107 lines
3.2 KiB
Java
107 lines
3.2 KiB
Java
|
|
package org.tzd.lm;
|
|||
|
|
|
|||
|
|
import com.axis.innovators.box.tools.FolderCreator;
|
|||
|
|
import com.axis.innovators.box.tools.LibraryLoad;
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* LM推理类
|
|||
|
|
* @author tzdwindows 7
|
|||
|
|
*/
|
|||
|
|
public class LM {
|
|||
|
|
public static boolean CUDA = false;
|
|||
|
|
public final static String DEEP_SEEK = FolderCreator.getModelFolder() + "//DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf";
|
|||
|
|
|
|||
|
|
static {
|
|||
|
|
if (!CUDA) {
|
|||
|
|
LibraryLoad.loadLibrary("cpu/ggml-base");
|
|||
|
|
LibraryLoad.loadLibrary("cpu/ggml-cpu");
|
|||
|
|
LibraryLoad.loadLibrary("cpu/ggml");
|
|||
|
|
LibraryLoad.loadLibrary("cpu/llama");
|
|||
|
|
} else {
|
|||
|
|
LibraryLoad.loadLibrary("cuda/ggml-base");
|
|||
|
|
LibraryLoad.loadLibrary("cuda/ggml-cpu");
|
|||
|
|
LibraryLoad.loadLibrary("cuda/ggml-rpc");
|
|||
|
|
// cuda版本 cuda-cu12.4-x64(确保你有)
|
|||
|
|
LibraryLoad.loadLibrary("cuda/ggml-cuda");
|
|||
|
|
LibraryLoad.loadLibrary("cuda/ggml");
|
|||
|
|
LibraryLoad.loadLibrary("cuda/llama");
|
|||
|
|
}
|
|||
|
|
LibraryLoad.loadLibrary("LM");
|
|||
|
|
}
|
|||
|
|
/**
|
|||
|
|
* 加载模型
|
|||
|
|
* @param pathModel 模型路径
|
|||
|
|
* @return 模型句柄
|
|||
|
|
*/
|
|||
|
|
public static native long llamaLoadModelFromFile(String pathModel);
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 释放模型资源
|
|||
|
|
* @param modelHandle 模型句柄
|
|||
|
|
*/
|
|||
|
|
public static native void llamaFreeModel(long modelHandle);
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 上下文创建
|
|||
|
|
* @param modelHandle 上下文句柄
|
|||
|
|
* @return 上下文句柄
|
|||
|
|
*/
|
|||
|
|
public static native long createContext(long modelHandle);
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 释放上下文资源
|
|||
|
|
* @param ctxHandle 上下文句柄
|
|||
|
|
*/
|
|||
|
|
public static native void llamaFreeContext(long ctxHandle);
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 推理模型
|
|||
|
|
* @param modelHandle 模型句柄
|
|||
|
|
* @param ctxHandle 模型上下文句柄
|
|||
|
|
* @param temperature 温度
|
|||
|
|
* @param prompt 问题
|
|||
|
|
* @param messageCallback 回调接口
|
|||
|
|
* @return 最终内容
|
|||
|
|
*/
|
|||
|
|
public static native String inference(long modelHandle ,
|
|||
|
|
long ctxHandle,
|
|||
|
|
float temperature,
|
|||
|
|
String prompt,
|
|||
|
|
MessageCallback messageCallback);
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* 回调接口
|
|||
|
|
*/
|
|||
|
|
public interface MessageCallback {
|
|||
|
|
/**
|
|||
|
|
* 接口回调
|
|||
|
|
* @param message 消息
|
|||
|
|
*/
|
|||
|
|
void onMessage(String message);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
public static void main(String[] args) {
|
|||
|
|
// 加载模型
|
|||
|
|
long modelHandle = llamaLoadModelFromFile(DEEP_SEEK);
|
|||
|
|
// 创建新的上下文
|
|||
|
|
long ctxHandle = createContext(modelHandle);
|
|||
|
|
inference(modelHandle, ctxHandle, 0.2f, "写一个ai", new MessageCallback() {
|
|||
|
|
@Override
|
|||
|
|
public void onMessage(String message) {
|
|||
|
|
// 回调输出
|
|||
|
|
System.out.print(message);
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
// 推理模型
|
|||
|
|
inference(modelHandle, ctxHandle, 0.2f, "谢谢你", new MessageCallback() {
|
|||
|
|
@Override
|
|||
|
|
public void onMessage(String message) {
|
|||
|
|
// 回调输出
|
|||
|
|
System.out.print(message);
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
// 清理上下文
|
|||
|
|
llamaFreeContext(ctxHandle);
|
|||
|
|
}
|
|||
|
|
}
|