第二次提交

2025-02-06 20:49:46 +08:00
parent d06b32a92f
commit 4e2253df81
4 changed files with 150 additions and 0 deletions
--- a/src/main/java/org/tzd/lm/LM.java
+++ b/src/main/java/org/tzd/lm/LM.java
@@ -0,0 +1,106 @@
+package org.tzd.lm;
+
+import com.axis.innovators.box.tools.FolderCreator;
+import com.axis.innovators.box.tools.LibraryLoad;
+
+/**
+ * LM推理类
+ * @author tzdwindows 7
+ */
+public class LM {
+    public static boolean CUDA = false;
+    public final static String DEEP_SEEK = FolderCreator.getModelFolder() + "//DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf";
+
+    static {
+        if (!CUDA) {
+            LibraryLoad.loadLibrary("cpu/ggml-base");
+            LibraryLoad.loadLibrary("cpu/ggml-cpu");
+            LibraryLoad.loadLibrary("cpu/ggml");
+            LibraryLoad.loadLibrary("cpu/llama");
+        } else {
+            LibraryLoad.loadLibrary("cuda/ggml-base");
+            LibraryLoad.loadLibrary("cuda/ggml-cpu");
+            LibraryLoad.loadLibrary("cuda/ggml-rpc");
+            // cuda版本 cuda-cu12.4-x64（确保你有）
+            LibraryLoad.loadLibrary("cuda/ggml-cuda");
+            LibraryLoad.loadLibrary("cuda/ggml");
+            LibraryLoad.loadLibrary("cuda/llama");
+        }
+        LibraryLoad.loadLibrary("LM");
+    }
+    /**
+     * 加载模型
+     * @param pathModel 模型路径
+     * @return 模型句柄
+     */
+    public static native long llamaLoadModelFromFile(String pathModel);
+
+    /**
+     * 释放模型资源
+     * @param modelHandle 模型句柄
+     */
+    public static native void llamaFreeModel(long modelHandle);
+
+    /**
+     * 上下文创建
+     * @param modelHandle 上下文句柄
+     * @return 上下文句柄
+     */
+    public static native long createContext(long modelHandle);
+
+    /**
+     * 释放上下文资源
+     * @param ctxHandle 上下文句柄
+     */
+    public static native void llamaFreeContext(long ctxHandle);
+
+    /**
+     * 推理模型
+     * @param modelHandle 模型句柄
+     * @param ctxHandle 模型上下文句柄
+     * @param temperature 温度
+     * @param prompt 问题
+     * @param messageCallback 回调接口
+     * @return 最终内容
+     */
+    public static native String inference(long modelHandle ,
+                                          long ctxHandle,
+                                          float temperature,
+                                          String prompt,
+                                          MessageCallback messageCallback);
+
+    /**
+     * 回调接口
+     */
+    public interface MessageCallback {
+        /**
+         * 接口回调
+         * @param message 消息
+         */
+        void onMessage(String message);
+    }
+
+    public static void main(String[] args) {
+        // 加载模型
+        long modelHandle = llamaLoadModelFromFile(DEEP_SEEK);
+        // 创建新的上下文
+        long ctxHandle = createContext(modelHandle);
+        inference(modelHandle, ctxHandle, 0.2f, "写一个ai", new MessageCallback() {
+            @Override
+            public void onMessage(String message) {
+                // 回调输出
+                System.out.print(message);
+            }
+        });
+        // 推理模型
+        inference(modelHandle, ctxHandle, 0.2f, "谢谢你", new MessageCallback() {
+            @Override
+            public void onMessage(String message) {
+                // 回调输出
+                System.out.print(message);
+            }
+        });
+        // 清理上下文
+        llamaFreeContext(ctxHandle);
+    }
+}