Files
window-axis-innovators-box/src/main/java/org/tzd/lm/LM.java

107 lines
3.2 KiB
Java
Raw Normal View History

2025-02-06 20:49:46 +08:00
package org.tzd.lm;
import com.axis.innovators.box.tools.FolderCreator;
import com.axis.innovators.box.tools.LibraryLoad;
/**
* LM推理类
* @author tzdwindows 7
*/
public class LM {
public static boolean CUDA = false;
public final static String DEEP_SEEK = FolderCreator.getModelFolder() + "//DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf";
static {
if (!CUDA) {
LibraryLoad.loadLibrary("cpu/ggml-base");
LibraryLoad.loadLibrary("cpu/ggml-cpu");
LibraryLoad.loadLibrary("cpu/ggml");
LibraryLoad.loadLibrary("cpu/llama");
} else {
LibraryLoad.loadLibrary("cuda/ggml-base");
LibraryLoad.loadLibrary("cuda/ggml-cpu");
LibraryLoad.loadLibrary("cuda/ggml-rpc");
// cuda版本 cuda-cu12.4-x64确保你有
LibraryLoad.loadLibrary("cuda/ggml-cuda");
LibraryLoad.loadLibrary("cuda/ggml");
LibraryLoad.loadLibrary("cuda/llama");
}
LibraryLoad.loadLibrary("LM");
}
/**
* 加载模型
* @param pathModel 模型路径
* @return 模型句柄
*/
public static native long llamaLoadModelFromFile(String pathModel);
/**
* 释放模型资源
* @param modelHandle 模型句柄
*/
public static native void llamaFreeModel(long modelHandle);
/**
* 上下文创建
* @param modelHandle 上下文句柄
* @return 上下文句柄
*/
public static native long createContext(long modelHandle);
/**
* 释放上下文资源
* @param ctxHandle 上下文句柄
*/
public static native void llamaFreeContext(long ctxHandle);
/**
* 推理模型
* @param modelHandle 模型句柄
* @param ctxHandle 模型上下文句柄
* @param temperature 温度
* @param prompt 问题
* @param messageCallback 回调接口
* @return 最终内容
*/
public static native String inference(long modelHandle ,
long ctxHandle,
float temperature,
String prompt,
MessageCallback messageCallback);
/**
* 回调接口
*/
public interface MessageCallback {
/**
* 接口回调
* @param message 消息
*/
void onMessage(String message);
}
public static void main(String[] args) {
// 加载模型
long modelHandle = llamaLoadModelFromFile(DEEP_SEEK);
// 创建新的上下文
long ctxHandle = createContext(modelHandle);
inference(modelHandle, ctxHandle, 0.2f, "写一个ai", new MessageCallback() {
@Override
public void onMessage(String message) {
// 回调输出
System.out.print(message);
}
});
// 推理模型
inference(modelHandle, ctxHandle, 0.2f, "谢谢你", new MessageCallback() {
@Override
public void onMessage(String message) {
// 回调输出
System.out.print(message);
}
});
// 清理上下文
llamaFreeContext(ctxHandle);
}
}