update

2026-05-16 20:27:50 +00:00 · 2025-08-21 13:08:37 +08:00
parent 178269de40
commit f6f6bb13be
21 changed files with 1381 additions and 918 deletions
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/baidu/SpringAIBaiduService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/baidu/SpringAIBaiduService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 15:10:31
+ * @LastEditTime: 2025-08-21 12:46:15
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -31,6 +31,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;

@@ -82,7 +83,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
        // 记录开始时间和初始化token使用统计
        long startTime = System.currentTimeMillis();
        final boolean[] success = { false };
-        final TokenUsage[] tokenUsage = { new TokenUsage(0, 0, 0) };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
        // 用于累积所有响应文本
        final StringBuilder[] fullResponseText = { new StringBuilder() };

@@ -118,7 +119,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
                    // 如果token提取失败，使用累积的完整响应文本来估算token
                    if (tokenUsage[0].getTotalTokens() == 0 && fullResponseText[0].length() > 0) {
                        log.info("Baidu API using accumulated response text for token estimation: {}", fullResponseText[0].toString());
-                        TokenUsage estimatedUsage = estimateBaiduTokenUsageFromText(fullResponseText[0].toString());
+                        ChatTokenUsage estimatedUsage = estimateBaiduTokenUsageFromText(fullResponseText[0].toString());
                        tokenUsage[0] = estimatedUsage;
                        log.info("Baidu API final estimated token usage: {}", estimatedUsage);
                    }
@@ -138,7 +139,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
        log.info("SpringAIBaiduService processPromptSync with full prompt content: {}", fullPromptContent);
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);

        try {
            if (baiduChatModel == null) {
@@ -212,7 +213,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
        // 记录开始时间和初始化token使用统计
        long startTime = System.currentTimeMillis();
        final boolean[] success = { false };
-        final TokenUsage[] tokenUsage = { new TokenUsage(0, 0, 0) };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
        // 用于累积所有响应文本
        final StringBuilder[] fullResponseText = { new StringBuilder() };

@@ -255,7 +256,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
                    // 如果token提取失败，使用累积的完整响应文本来估算token
                    if (tokenUsage[0].getTotalTokens() == 0 && fullResponseText[0].length() > 0) {
                        log.info("Baidu API using accumulated response text for token estimation: {}", fullResponseText[0].toString());
-                        TokenUsage estimatedUsage = estimateBaiduTokenUsageFromText(fullResponseText[0].toString());
+                        ChatTokenUsage estimatedUsage = estimateBaiduTokenUsageFromText(fullResponseText[0].toString());
                        tokenUsage[0] = estimatedUsage;
                        log.info("Baidu API final estimated token usage: {}", estimatedUsage);
                    }
@@ -283,17 +284,17 @@ public class SpringAIBaiduService extends BaseSpringAIService {
     * @param response ChatResponse对象
     * @return TokenUsage对象
     */
-    private TokenUsage extractBaiduTokenUsage(org.springframework.ai.chat.model.ChatResponse response) {
+    private ChatTokenUsage extractBaiduTokenUsage(org.springframework.ai.chat.model.ChatResponse response) {
        try {
            if (response == null) {
                log.warn("Baidu API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }

            var metadata = response.getMetadata();
            if (metadata == null) {
                log.warn("Baidu API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }

            log.info("Baidu API manual token extraction - metadata: {}", metadata);
@@ -390,7 +391,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
                    log.info("Baidu API manual token extraction result - prompt: {}, completion: {}, total: {}",
                            promptTokens, completionTokens, totalTokens);

-                    return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                    return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                }
            }

@@ -404,7 +405,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {

            // 方法3: 如果手动提取失败，尝试使用原始的extractTokenUsage方法作为后备
            log.info("Baidu API manual extraction failed, trying original extractTokenUsage method");
-            TokenUsage fallbackUsage = extractTokenUsage(response);
+            ChatTokenUsage fallbackUsage = extractTokenUsage(response);
            if (fallbackUsage.getTotalTokens() > 0) {
                log.info("Baidu API fallback extraction successful: {}", fallbackUsage);
                return fallbackUsage;
@@ -412,13 +413,13 @@ public class SpringAIBaiduService extends BaseSpringAIService {

            // 方法4: 如果所有方法都失败，尝试估算token使用量
            log.info("Baidu API all extraction methods failed, attempting to estimate token usage");
-            TokenUsage estimatedUsage = estimateBaiduTokenUsageFromResponse(response);
+            ChatTokenUsage estimatedUsage = estimateBaiduTokenUsageFromResponse(response);
            log.info("Baidu API estimated token usage: {}", estimatedUsage);
            return estimatedUsage;

        } catch (Exception e) {
            log.error("Error in manual Baidu token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
        }
    }

@@ -428,10 +429,10 @@ public class SpringAIBaiduService extends BaseSpringAIService {
     * @param response ChatResponse对象
     * @return 估算的TokenUsage对象
     */
-    private TokenUsage estimateBaiduTokenUsageFromResponse(org.springframework.ai.chat.model.ChatResponse response) {
+    private ChatTokenUsage estimateBaiduTokenUsageFromResponse(org.springframework.ai.chat.model.ChatResponse response) {
        try {
            if (response == null || response.getResults() == null || response.getResults().isEmpty()) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }
            
            // 获取输出文本
@@ -446,7 +447,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
            
        } catch (Exception e) {
            log.error("Error estimating Baidu token usage", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
        }
    }

@@ -456,10 +457,10 @@ public class SpringAIBaiduService extends BaseSpringAIService {
     * @param outputText 完整的输出文本
     * @return 估算的TokenUsage对象
     */
-    private TokenUsage estimateBaiduTokenUsageFromText(String outputText) {
+    private ChatTokenUsage estimateBaiduTokenUsageFromText(String outputText) {
        try {
            if (outputText == null || outputText.isEmpty()) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }
            
            // 估算token使用量
@@ -474,11 +475,11 @@ public class SpringAIBaiduService extends BaseSpringAIService {
                    "Baidu API estimated tokens - output: {} chars -> {} tokens, estimated prompt: {} tokens, total: {} tokens",
                    outputText.length(), completionTokens, promptTokens, totalTokens);
            
-            return new TokenUsage(promptTokens, completionTokens, totalTokens);
+            return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
            
        } catch (Exception e) {
            log.error("Error estimating Baidu token usage from text", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
        }
    }

--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/dashscope/SpringAIDashscopeService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/dashscope/SpringAIDashscopeService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-18 17:04:41
+ * @LastEditTime: 2025-08-21 12:46:21
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -33,6 +33,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;

@@ -87,7 +88,7 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
        
        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        final StringBuilder[] fullResponseText = {new StringBuilder()};
        
        // 使用同一个ChatModel实例，但传入不同的选项
@@ -132,7 +133,7 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
    protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        log.info("Dashscope API sync fullPromptContent: {}", fullPromptContent);
        
@@ -202,7 +203,7 @@ public class SpringAIDashscopeService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        final StringBuilder[] fullResponseText = {new StringBuilder()};

        bytedeskDashscopeChatModel.stream(requestPrompt).subscribe(
@@ -259,17 +260,17 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
     * @param response ChatResponse对象
     * @return TokenUsage对象
     */
-    private TokenUsage extractDashscopeTokenUsage(ChatResponse response) {
+    private ChatTokenUsage extractDashscopeTokenUsage(ChatResponse response) {
        try {
            if (response == null) {
                log.warn("Dashscope API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }

            var metadata = response.getMetadata();
            if (metadata == null) {
                log.warn("Dashscope API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }

            log.info("Dashscope API token extraction - metadata: {}", metadata);
@@ -286,18 +287,18 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
                            promptTokens, completionTokens, totalTokens);
                    
                    if (totalTokens > 0) {
-                        return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                        return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                    }
                }
            } catch (Exception e) {
                log.debug("Could not get usage via getUsage() method: {}", e.getMessage());
            }

-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
            
        } catch (Exception e) {
            log.error("Error in Dashscope token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
        }
    }
    
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/deepseek/SpringAIDeepseekService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/deepseek/SpringAIDeepseekService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-18 17:04:22
+ * @LastEditTime: 2025-08-21 12:46:25
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -34,6 +34,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;

@@ -87,7 +88,7 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
        
        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        
        // 使用同一个ChatModel实例，但传入不同的选项
        deepseekChatModel.stream(requestPrompt).subscribe(
@@ -126,7 +127,7 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
        log.info("SpringAIDeepseekService processPromptSync with full prompt content: {}", fullPromptContent);
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        try {
            if (deepseekChatModel == null) {
@@ -195,7 +196,7 @@ public class SpringAIDeepseekService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        deepseekChatModel.stream(requestPrompt).subscribe(
                response -> {
@@ -245,17 +246,17 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
     * @param response ChatResponse对象
     * @return TokenUsage对象
     */
-    private TokenUsage extractDeepSeekTokenUsage(ChatResponse response) {
+    private ChatTokenUsage extractDeepSeekTokenUsage(ChatResponse response) {
        try {
            if (response == null) {
                log.warn("Deepseek API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }

            var metadata = response.getMetadata();
            if (metadata == null) {
                log.warn("Deepseek API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }

            log.info("Deepseek API token extraction - metadata: {}", metadata);
@@ -272,18 +273,18 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
                            promptTokens, completionTokens, totalTokens);
                    
                    if (totalTokens > 0) {
-                        return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                        return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                    }
                }
            } catch (Exception e) {
                log.debug("Could not get usage via getUsage() method: {}", e.getMessage());
            }

-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
            
        } catch (Exception e) {
            log.error("Error in Deepseek token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
        }
    }
    
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/gitee/SpringAIGiteeService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/gitee/SpringAIGiteeService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 13:23:44
+ * @LastEditTime: 2025-08-21 12:46:30
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -32,6 +32,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;

@@ -84,7 +85,7 @@ public class SpringAIGiteeService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        // 使用同一个ChatModel实例，但传入不同的选项
        giteeChatModel.stream(requestPrompt).subscribe(
@@ -123,7 +124,7 @@ public class SpringAIGiteeService extends BaseSpringAIService {
        log.info("SpringAIGiteeService processPromptSync with full prompt content: {}", fullPromptContent);
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        try {
            if (giteeChatModel == null) {
@@ -193,7 +194,7 @@ public class SpringAIGiteeService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        giteeChatModel.stream(requestPrompt).subscribe(
                response -> {
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/minimax/SpringAIMinimaxService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/minimax/SpringAIMinimaxService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-22 08:34:28
+ * @LastEditTime: 2025-08-21 12:46:36
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -34,6 +34,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;

@@ -87,7 +88,7 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
        
        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        
        // 使用同一个ChatModel实例，但传入不同的选项
        minimaxChatModel.stream(requestPrompt).subscribe(
@@ -126,7 +127,7 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
        log.info("SpringAIMinimaxService processPromptSync with full prompt content: {}", fullPromptContent);
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        try {
            if (minimaxChatModel == null) {
@@ -195,7 +196,7 @@ public class SpringAIMinimaxService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        minimaxChatModel.stream(requestPrompt).subscribe(
                response -> {
@@ -245,17 +246,17 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
     * @param response ChatResponse对象
     * @return TokenUsage对象
     */
-    private TokenUsage extractDeepSeekTokenUsage(ChatResponse response) {
+    private ChatTokenUsage extractDeepSeekTokenUsage(ChatResponse response) {
        try {
            if (response == null) {
                log.warn("Minimax API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }

            var metadata = response.getMetadata();
            if (metadata == null) {
                log.warn("Minimax API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }

            log.info("Minimax API token extraction - metadata: {}", metadata);
@@ -272,18 +273,18 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
                            promptTokens, completionTokens, totalTokens);
                    
                    if (totalTokens > 0) {
-                        return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                        return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                    }
                }
            } catch (Exception e) {
                log.debug("Could not get usage via getUsage() method: {}", e.getMessage());
            }

-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
            
        } catch (Exception e) {
            log.error("Error in Minimax token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
        }
    }
    
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatConfig.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatConfig.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2024-05-31 10:24:39
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-08-20 21:25:13
+ * @LastEditTime: 2025-08-21 12:39:58
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license.
@@ -13,67 +13,67 @@
 */
 package com.bytedesk.ai.springai.providers.ollama;

-// import org.springframework.ai.chat.client.ChatClient;
-// import org.springframework.ai.chat.client.advisor.SimpleLoggerAdvisor;
-// import org.springframework.ai.ollama.OllamaChatModel;
-// import org.springframework.ai.ollama.api.OllamaApi;
-// import org.springframework.ai.ollama.api.OllamaOptions;
-// import org.springframework.beans.factory.annotation.Value;
-// import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
-// import org.springframework.context.annotation.Bean;
-// import org.springframework.context.annotation.Configuration;
-// import lombok.Data;
-// import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.client.advisor.SimpleLoggerAdvisor;
+import org.springframework.ai.ollama.OllamaChatModel;
+import org.springframework.ai.ollama.api.OllamaApi;
+import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import lombok.Data;
+import lombok.extern.slf4j.Slf4j;

-// /**
-//  * https://ollama.com/
-//  * https://www.promptingguide.ai/
-//  * Ollama Chat Configuration
-//  */
-// @Slf4j
-// @Data
-// @Configuration
-// @ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
-// public class SpringAIOllamaChatConfig {
+/**
+ * https://ollama.com/
+ * https://www.promptingguide.ai/
+ * Ollama Chat Configuration
+ */
+@Slf4j
+@Data
+@Configuration
+@ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
+public class SpringAIOllamaChatConfig {

-//     @Value("${spring.ai.ollama.base-url:http://host.docker.internal:11434}")
-//     private String ollamaBaseUrl;
+    @Value("${spring.ai.ollama.base-url:http://host.docker.internal:11434}")
+    private String ollamaBaseUrl;

-//     @Value("${spring.ai.ollama.chat.options.model:qwen3:0.6b}")
-//     private String ollamaChatOptionsModel;
+    @Value("${spring.ai.ollama.chat.options.model:qwen3:0.6b}")
+    private String ollamaChatOptionsModel;

-//     @Bean("bytedeskOllamaApi")
-//     OllamaApi bytedeskOllamaApi() {
-//         return OllamaApi.builder()
-//                 .baseUrl(ollamaBaseUrl)
-//                 .build();
-//     }
+    @Bean("bytedeskOllamaApi")
+    OllamaApi bytedeskOllamaApi() {
+        return OllamaApi.builder()
+                .baseUrl(ollamaBaseUrl)
+                .build();
+    }

-//     @Bean("bytedeskOllamaChatOptions")
-//     OllamaOptions bytedeskOllamaChatOptions() {
-//         return OllamaOptions.builder()
-//                 .model(ollamaChatOptionsModel)
-//                 // 使用keepAlive而不是timeout来设置超时
-//                 // .keepAlive("30s") // 使用30秒的超时设置
-//                 // .numKeep(0)  // 不保留历史对话上下文
-//                 // .useNUMA(ollamaChatOptionsNuma) // 使用正确的方法名useNUMA而不是numa
-//                 .build();
-//     }
+    @Bean("bytedeskOllamaChatOptions")
+    OllamaOptions bytedeskOllamaChatOptions() {
+        return OllamaOptions.builder()
+                .model(ollamaChatOptionsModel)
+                // 使用keepAlive而不是timeout来设置超时
+                // .keepAlive("30s") // 使用30秒的超时设置
+                // .numKeep(0)  // 不保留历史对话上下文
+                // .useNUMA(ollamaChatOptionsNuma) // 使用正确的方法名useNUMA而不是numa
+                .build();
+    }

-//     @Bean("bytedeskOllamaChatModel")
-//     OllamaChatModel bytedeskOllamaChatModel() {
-//         return OllamaChatModel.builder()
-//                 .ollamaApi(bytedeskOllamaApi())
-//                 .defaultOptions(bytedeskOllamaChatOptions())
-//                 .build();
-//     }
+    @Bean("bytedeskOllamaChatModel")
+    OllamaChatModel bytedeskOllamaChatModel() {
+        return OllamaChatModel.builder()
+                .ollamaApi(bytedeskOllamaApi())
+                .defaultOptions(bytedeskOllamaChatOptions())
+                .build();
+    }

-//     @Bean("bytedeskOllamaChatClient")
-//     ChatClient bytedeskOllamaChatClient() {
-//         return ChatClient.builder(bytedeskOllamaChatModel())
-//                 .defaultOptions(bytedeskOllamaChatOptions())
-//                 .defaultAdvisors(new SimpleLoggerAdvisor())
-//                 .build();
-//     }
+    @Bean("bytedeskOllamaChatClient")
+    ChatClient bytedeskOllamaChatClient() {
+        return ChatClient.builder(bytedeskOllamaChatModel())
+                .defaultOptions(bytedeskOllamaChatOptions())
+                .defaultAdvisors(new SimpleLoggerAdvisor())
+                .build();
+    }

-// } 
+} 
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatService.java
@@ -0,0 +1,324 @@
+/*
+ * @Author: jackning 270580156@qq.com
+ * @Date: 2025-02-26 16:59:14
+ * @LastEditors: jackning 270580156@qq.com
+ * @LastEditTime: 2025-08-21 12:45:00
+ * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
+ *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
+ *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
+ *  Business Source License 1.1: https://github.com/Bytedesk/bytedesk/blob/main/LICENSE 
+ *  contact: 270580156@qq.com 
+ * 
+ * Copyright (c) 2025 by bytedesk.com, All Rights Reserved. 
+ */
+package com.bytedesk.ai.springai.providers.ollama;
+
+import java.util.List;
+import org.springframework.ai.chat.messages.AssistantMessage;
+import org.springframework.ai.chat.model.Generation;
+import org.springframework.ai.chat.prompt.Prompt;
+import org.springframework.ai.ollama.OllamaChatModel;
+import org.springframework.ai.ollama.api.OllamaApi;
+import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.http.HttpStatus;
+import org.springframework.stereotype.Service;
+import org.springframework.util.Assert;
+import org.springframework.util.StringUtils;
+import org.springframework.web.client.HttpClientErrorException;
+import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
+
+import com.bytedesk.ai.robot.RobotLlm;
+import com.bytedesk.ai.robot.RobotProtobuf;
+import com.bytedesk.ai.springai.service.BaseSpringAIService;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
+import com.bytedesk.core.constant.LlmConsts;
+import com.bytedesk.core.message.MessageProtobuf;
+import com.bytedesk.core.message.MessageTypeEnum;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Service
+@ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
+public class SpringAIOllamaChatService extends BaseSpringAIService {
+
+    @Autowired
+    @Qualifier("bytedeskOllamaApi")
+    private OllamaApi bytedeskOllamaApi;
+
+    @Autowired(required = false)
+    @Qualifier("bytedeskOllamaChatModel")
+    private OllamaChatModel bytedeskOllamaChatModel;
+
+    public SpringAIOllamaChatService() {
+        super(); // 调用基类的无参构造函数
+    }
+
+    /**
+     * 根据机器人配置创建动态的OllamaOptions
+     * 
+     * @param llm 机器人LLM配置
+     * @return 根据机器人配置创建的选项
+     */
+    private OllamaOptions createOllamaOptions(RobotLlm llm) {
+        return super.createDynamicOptions(llm, robotLlm -> OllamaOptions.builder()
+                .model(robotLlm.getTextModel())
+                .temperature(robotLlm.getTemperature())
+                .topP(robotLlm.getTopP())
+                .topK(robotLlm.getTopK())
+                .build());
+    }
+
+    public OllamaApi createOllamaApi(String apiUrl) {
+        return OllamaApi.builder()
+                .baseUrl(apiUrl)
+                .build();
+    }
+
+    /**
+     * 检查模型是否存在
+     * 
+     * @param modelName 模型名称
+     * @return 如果模型存在返回true，否则返回false
+     */
+    public Boolean isModelExists(OllamaRequest request) {
+        OllamaApi ollamaApi = createOllamaApi(request.getApiUrl());
+        String modelName = request.getModel();
+        Assert.hasText(modelName, "Model name must not be null or empty");
+        try {
+            ollamaApi.showModel(new OllamaApi.ShowModelRequest(modelName));
+            return true;
+        } catch (HttpClientErrorException e) {
+            if (e.getStatusCode() == HttpStatus.NOT_FOUND) {
+                return false;
+            }
+            log.error("检查模型是否存在时发生错误: {}, 状态码: {}", modelName, e.getStatusCode());
+        } catch (Exception e) {
+            log.error("检查模型是否存在时发生未知错误: {}, 错误: {}", modelName, e.getMessage());
+        }
+        return false;
+    }
+
+    protected void processPromptWebsocket(Prompt prompt, RobotProtobuf robot, MessageProtobuf messageProtobufQuery,
+            MessageProtobuf messageProtobufReply, String fullPromptContent) {
+        // 从robot中获取llm配置
+        RobotLlm llm = robot.getLlm();
+        log.info("Ollama API websocket fullPromptContent: {}", fullPromptContent);
+        if (llm == null) {
+            log.info("Ollama API not available");
+            sendMessageWebsocket(MessageTypeEnum.ERROR, "Ollama service is not available", messageProtobufReply);
+            return;
+        }
+
+        // 获取适当的模型实例
+        OllamaChatModel chatModel = bytedeskOllamaChatModel;
+        if (chatModel == null) {
+            log.info("Ollama API not available");
+            sendMessageWebsocket(MessageTypeEnum.ERROR, "Ollama service is not available", messageProtobufReply);
+            return;
+        }
+
+        long startTime = System.currentTimeMillis();
+        final boolean[] success = { false };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
+
+        try {
+            chatModel.stream(prompt).subscribe(
+                    response -> {
+                        if (response != null) {
+                            log.info("Ollama API response metadata: {}", response.getMetadata());
+                            List<Generation> generations = response.getResults();
+                            for (Generation generation : generations) {
+                                AssistantMessage assistantMessage = generation.getOutput();
+                                String textContent = assistantMessage.getText();
+                                log.info("Ollama API Websocket response text: {}", textContent);
+
+                                sendMessageWebsocket(MessageTypeEnum.STREAM, textContent, messageProtobufReply);
+                            }
+                            // 提取token使用情况
+                            tokenUsage[0] = extractTokenUsage(response);
+                            success[0] = true;
+                        }
+                    },
+                    error -> {
+                        log.error("Ollama API error: ", error);
+                        sendMessageWebsocket(MessageTypeEnum.ERROR, "服务暂时不可用，请稍后重试", messageProtobufReply);
+                        success[0] = false;
+                    },
+                    () -> {
+                        log.info("Chat stream completed");
+                        // 记录token使用情况
+                        long responseTime = System.currentTimeMillis() - startTime;
+                        String modelType = (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel()
+                                : "llama2";
+                        recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                                tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), success[0],
+                                responseTime);
+                    });
+        } catch (Exception e) {
+            log.error("Error processing Ollama prompt", e);
+            sendMessageWebsocket(MessageTypeEnum.ERROR, "服务暂时不可用，请稍后重试", messageProtobufReply);
+            success[0] = false;
+            // 记录token使用情况
+            long responseTime = System.currentTimeMillis() - startTime;
+            String modelType = (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel() : "llama2";
+            recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                    tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), success[0], responseTime);
+        }
+    }
+
+    protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
+        long startTime = System.currentTimeMillis();
+        boolean success = false;
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
+
+        log.info("Ollama API sync fullPromptContent: {}", fullPromptContent);
+
+        // 从robot中获取llm配置
+        RobotLlm llm = robot.getLlm();
+        log.info("Ollama API websocket fullPromptContent: {}", fullPromptContent);
+
+        if (llm == null) {
+            log.info("Ollama API not available");
+            return "Ollama service is not available";
+        }
+
+        // 获取适当的模型实例
+        OllamaChatModel chatModel = bytedeskOllamaChatModel;
+
+        try {
+            try {
+                // 如果有robot参数，尝试创建自定义选项
+                if (robot != null && robot.getLlm() != null) {
+                    // 创建自定义选项
+                    OllamaOptions customOptions = createOllamaOptions(robot.getLlm());
+                    if (customOptions != null) {
+                        // 使用自定义选项创建Prompt
+                        Prompt prompt = new Prompt(message, customOptions);
+                        var response = chatModel.call(prompt);
+                        log.info("Ollama API Sync response metadata: {}", response.getMetadata());
+                        tokenUsage = extractTokenUsage(response);
+                        success = true;
+                        return extractTextFromResponse(response);
+                    }
+                }
+                var response = chatModel.call(message);
+                tokenUsage = extractTokenUsage(response);
+                success = true;
+                return extractTextFromResponse(response);
+            } catch (Exception e) {
+                log.error("Ollama API sync error", e);
+                success = false;
+                return "服务暂时不可用，请稍后重试";
+            }
+
+        } catch (Exception e) {
+            log.error("Ollama API sync error", e);
+            success = false;
+            return "服务暂时不可用，请稍后重试";
+        } finally {
+            // 记录token使用情况
+            long responseTime = System.currentTimeMillis() - startTime;
+            String modelType = (robot != null && robot.getLlm() != null
+                    && StringUtils.hasText(robot.getLlm().getTextModel()))
+                            ? robot.getLlm().getTextModel()
+                            : "llama2";
+            recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                    tokenUsage.getPromptTokens(), tokenUsage.getCompletionTokens(), success, responseTime);
+        }
+    }
+
+    protected void processPromptSse(Prompt prompt, RobotProtobuf robot, MessageProtobuf messageProtobufQuery,
+            MessageProtobuf messageProtobufReply, SseEmitter emitter, String fullPromptContent) {
+        Assert.notNull(emitter, "SseEmitter must not be null");
+        // 从robot中获取llm配置
+        RobotLlm llm = robot.getLlm();
+        log.info("Ollama API SSE fullPromptContent: {}", fullPromptContent);
+
+        if (llm == null) {
+            log.info("Ollama API not available");
+            sendStreamEndMessage(messageProtobufQuery, messageProtobufReply, emitter, 0, 0, 0, fullPromptContent,
+                    LlmConsts.OLLAMA,
+                    (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel() : "llama2");
+            return;
+        }
+
+        // 获取适当的模型实例
+        OllamaChatModel chatModel = bytedeskOllamaChatModel;
+
+        if (chatModel == null) {
+            log.info("Ollama API not available");
+            // 使用sendStreamEndMessage方法替代重复的代码
+            sendStreamEndMessage(messageProtobufQuery, messageProtobufReply, emitter, 0, 0, 0, fullPromptContent,
+                    LlmConsts.OLLAMA,
+                    (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel() : "llama2");
+            return;
+        }
+
+        long startTime = System.currentTimeMillis();
+        final boolean[] success = { false };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
+
+        try {
+            // 发送初始消息，告知用户请求已收到，正在处理
+            sendStreamStartMessage(messageProtobufReply, emitter, "正在思考中...");
+
+            chatModel.stream(prompt).subscribe(
+                    response -> {
+                        try {
+                            if (response != null && !isEmitterCompleted(emitter)) {
+                                List<Generation> generations = response.getResults();
+                                for (Generation generation : generations) {
+                                    AssistantMessage assistantMessage = generation.getOutput();
+                                    String textContent = assistantMessage.getText();
+                                    log.info("Ollama API SSE response text: {}", textContent);
+
+                                    sendStreamMessage(messageProtobufQuery, messageProtobufReply, emitter, textContent);
+                                }
+                                // 提取token使用情况
+                                tokenUsage[0] = extractTokenUsage(response);
+                                success[0] = true;
+                            }
+                        } catch (Exception e) {
+                            log.error("Ollama API SSE error 1: ", e);
+                            handleSseError(e, messageProtobufQuery, messageProtobufReply, emitter);
+                            success[0] = false;
+                        }
+                    },
+                    error -> {
+                        log.error("Ollama API SSE error 2: ", error);
+                        handleSseError(error, messageProtobufQuery, messageProtobufReply, emitter);
+                        success[0] = false;
+                    },
+                    () -> {
+                        log.info("Ollama API SSE complete");
+                        // 发送流结束消息，包含token使用情况和prompt内容
+                        sendStreamEndMessage(messageProtobufQuery, messageProtobufReply, emitter,
+                                tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(),
+                                tokenUsage[0].getTotalTokens(), fullPromptContent, LlmConsts.OLLAMA,
+                                (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel()
+                                        : "llama2");
+                        // 记录token使用情况
+                        long responseTime = System.currentTimeMillis() - startTime;
+                        String modelType = (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel()
+                                : "llama2";
+                        recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                                tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), success[0],
+                                responseTime);
+                    });
+        } catch (Exception e) {
+            log.error("Error starting Ollama stream 4", e);
+            handleSseError(e, messageProtobufQuery, messageProtobufReply, emitter);
+            success[0] = false;
+            // 记录token使用情况
+            long responseTime = System.currentTimeMillis() - startTime;
+            String modelType = (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel() : "llama2";
+            recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                    tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), success[0], responseTime);
+        }
+    }
+
+}
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaController.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaController.java
@@ -3,7 +3,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2024-05-31 09:50:56
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-08-20 21:25:31
+ * @LastEditTime: 2025-08-21 12:40:17
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license.
@@ -23,6 +23,7 @@ import org.springframework.ai.chat.prompt.Prompt;
 import org.springframework.ai.ollama.OllamaChatModel;
 import org.springframework.ai.ollama.api.OllamaApi;
 import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.GetMapping;
@@ -43,7 +44,7 @@ import reactor.core.publisher.Flux;
@RestController
@RequestMapping("/api/v1/ollama")
@RequiredArgsConstructor
-// @ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
+@ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
 public class SpringAIOllamaController {

    private final SpringAIOllamaService springAIOllamaService;
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-26 16:59:14
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-08-20 10:57:53
+ * @LastEditTime: 2025-08-21 12:45:56
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -36,6 +36,7 @@ import com.bytedesk.ai.provider.LlmProviderRestService;
 import com.bytedesk.ai.robot.RobotLlm;
 import com.bytedesk.ai.robot.RobotProtobuf;
 import com.bytedesk.ai.springai.service.BaseSpringAIService;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
@@ -154,7 +155,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = { false };
-        final TokenUsage[] tokenUsage = { new TokenUsage(0, 0, 0) };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };

        try {
            chatModel.stream(prompt).subscribe(
@@ -165,6 +166,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
                            for (Generation generation : generations) {
                                AssistantMessage assistantMessage = generation.getOutput();
                                String textContent = assistantMessage.getText();
+                                log.info("Ollama API Websocket response text: {}", textContent);

                                sendMessageWebsocket(MessageTypeEnum.STREAM, textContent, messageProtobufReply);
                            }
@@ -204,7 +206,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
    protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);

        log.info("Ollama API sync fullPromptContent: {}", fullPromptContent);

@@ -230,6 +232,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
                        // 使用自定义选项创建Prompt
                        Prompt prompt = new Prompt(message, customOptions);
                        var response = chatModel.call(prompt);
+                        log.info("Ollama API Sync response metadata: {}", response.getMetadata());
                        tokenUsage = extractTokenUsage(response);
                        success = true;
                        return extractTextFromResponse(response);
@@ -291,7 +294,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = { false };
-        final TokenUsage[] tokenUsage = { new TokenUsage(0, 0, 0) };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };

        try {
            // 发送初始消息，告知用户请求已收到，正在处理
@@ -305,6 +308,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
                                for (Generation generation : generations) {
                                    AssistantMessage assistantMessage = generation.getOutput();
                                    String textContent = assistantMessage.getText();
+                                    log.info("Ollama API SSE response text: {}", textContent);

                                    sendStreamMessage(messageProtobufQuery, messageProtobufReply, emitter, textContent);
                                }
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openai/SpringAIOpenaiService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openai/SpringAIOpenaiService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 11:38:24
+ * @LastEditTime: 2025-08-21 12:46:49
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -31,6 +31,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;

@@ -82,7 +83,7 @@ public class SpringAIOpenaiService extends BaseSpringAIService {
        
        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        
        // 使用同一个ChatModel实例，但传入不同的选项
        openaiChatModel.stream(requestPrompt).subscribe(
@@ -120,7 +121,7 @@ public class SpringAIOpenaiService extends BaseSpringAIService {
    protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        log.info("OpenAI API sync fullPromptContent: {}", fullPromptContent);
        
@@ -188,7 +189,7 @@ public class SpringAIOpenaiService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        openaiChatModel.stream(requestPrompt).subscribe(
                response -> {
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openrouter/SpringAIOpenrouterService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openrouter/SpringAIOpenrouterService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 11:38:40
+ * @LastEditTime: 2025-08-21 12:46:55
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -31,6 +31,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;

@@ -83,7 +84,7 @@ public class SpringAIOpenrouterService extends BaseSpringAIService {
        
        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        
        // 使用同一个ChatModel实例，但传入不同的选项
        openrouterChatModel.stream(requestPrompt).subscribe(
@@ -122,7 +123,7 @@ public class SpringAIOpenrouterService extends BaseSpringAIService {
        log.info("SpringAIOpenrouterService processPromptSync with full prompt content: {}", fullPromptContent);
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        try {
            if (openrouterChatModel == null) {
@@ -184,7 +185,7 @@ public class SpringAIOpenrouterService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        openrouterChatModel.stream(requestPrompt).subscribe(
                response -> {
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/siliconflow/SpringAISiliconFlowService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/siliconflow/SpringAISiliconFlowService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 14:17:40
+ * @LastEditTime: 2025-08-21 12:47:01
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM –
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license.
@@ -32,6 +32,7 @@ import org.springframework.util.StringUtils;
 import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
 import java.util.List;
 import java.util.Optional;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 /**
 * @author: https://github.com/fzj111
@@ -85,7 +86,7 @@ public class SpringAISiliconFlowService extends BaseSpringAIService {
        
        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        
        // 使用同一个ChatModel实例，但传入不同的选项
        siliconFlowChatModel.get().stream(requestPrompt).subscribe(
@@ -126,7 +127,7 @@ public class SpringAISiliconFlowService extends BaseSpringAIService {
    protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        log.info("SiliconFlow API sync fullPromptContent: {}", fullPromptContent);
        
@@ -197,7 +198,7 @@ public class SpringAISiliconFlowService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        siliconFlowChatModel.get().stream(requestPrompt).subscribe(
                response -> {
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/tencent/SpringAITencentService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/tencent/SpringAITencentService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 14:17:58
+ * @LastEditTime: 2025-08-21 12:47:07
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -32,6 +32,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;

@@ -83,7 +84,7 @@ public class SpringAITencentService extends BaseSpringAIService {
        
        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        
        // 使用同一个ChatModel实例，但传入不同的选项
        tencentChatModel.stream(requestPrompt).subscribe(
@@ -123,7 +124,7 @@ public class SpringAITencentService extends BaseSpringAIService {
    protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        log.info("Tencent API sync fullPromptContent: {}", fullPromptContent);
        
@@ -192,7 +193,7 @@ public class SpringAITencentService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        tencentChatModel.stream(requestPrompt).subscribe(
                response -> {
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/volcengine/SpringAIVolcengineService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/volcengine/SpringAIVolcengineService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-28 11:44:03
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 14:16:43
+ * @LastEditTime: 2025-08-21 12:47:12
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -32,6 +32,7 @@ import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
 import lombok.extern.slf4j.Slf4j;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

@Slf4j
@Service
@@ -82,7 +83,7 @@ public class SpringAIVolcengineService extends BaseSpringAIService {
        
        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        
        // 使用同一个ChatModel实例，但传入不同的选项
        volcengineChatModel.stream(requestPrompt).subscribe(
@@ -121,7 +122,7 @@ public class SpringAIVolcengineService extends BaseSpringAIService {
        log.info("SpringAIVolcengineService processPromptSync with full prompt content: {}", fullPromptContent);
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        try {
            if (volcengineChatModel == null) {
@@ -191,7 +192,7 @@ public class SpringAIVolcengineService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        volcengineChatModel.stream(requestPrompt).subscribe(
                response -> {
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/zhipuai/SpringAIZhipuaiService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/zhipuai/SpringAIZhipuaiService.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-26 16:58:56
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 13:58:57
+ * @LastEditTime: 2025-08-21 12:47:17
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -35,6 +35,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;

 import lombok.extern.slf4j.Slf4j;
 import reactor.core.publisher.Flux;
@@ -117,7 +118,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};

        chatModel.stream(prompt).subscribe(
                response -> {
@@ -160,7 +161,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
    protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
        long startTime = System.currentTimeMillis();
        boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
        
        log.info("Zhipuai API sync fullPromptContent: {}", fullPromptContent);
        
@@ -263,7 +264,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {

        long startTime = System.currentTimeMillis();
        final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
        final ChatResponse[] lastResponse = {null};

        Flux<ChatResponse> responseFlux = chatModel.stream(prompt);
@@ -341,16 +342,16 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
     * @param response ChatResponse对象
     * @return TokenUsage对象
     */
-    private TokenUsage extractZhipuaiTokenUsage(org.springframework.ai.chat.model.ChatResponse response) {
+    private ChatTokenUsage extractZhipuaiTokenUsage(org.springframework.ai.chat.model.ChatResponse response) {
        try {
            if (response == null) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }
            
            var metadata = response.getMetadata();
            if (metadata == null) {
                log.warn("Zhipuai API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }
            
            log.info("Zhipuai API manual token extraction - metadata: {}", metadata);
@@ -440,7 +441,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
                    log.info("Zhipuai API manual token extraction result from string parsing - prompt: {}, completion: {}, total: {}", 
                            promptTokens, completionTokens, totalTokens);
                    
-                    return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                    return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                }
            }
            
@@ -491,7 +492,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
                    log.info("Zhipuai API manual token extraction result - prompt: {}, completion: {}, total: {}", 
                            promptTokens, completionTokens, totalTokens);
                    
-                    return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                    return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                }
            }
            
@@ -505,7 +506,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
            
            // 方法4: 如果手动提取失败，尝试使用原始的extractTokenUsage方法作为后备
            log.info("Zhipuai API manual extraction failed, trying original extractTokenUsage method");
-            TokenUsage fallbackUsage = extractTokenUsage(response);
+            ChatTokenUsage fallbackUsage = extractTokenUsage(response);
            if (fallbackUsage.getTotalTokens() > 0) {
                log.info("Zhipuai API fallback extraction successful: {}", fallbackUsage);
                return fallbackUsage;
@@ -513,7 +514,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
            
            // 方法5: 如果所有方法都失败，尝试估算token使用量
            log.info("Zhipuai API all extraction methods failed, attempting to estimate token usage");
-            TokenUsage estimatedUsage = estimateTokenUsageFromResponse(response);
+            ChatTokenUsage estimatedUsage = estimateTokenUsageFromResponse(response);
            if (estimatedUsage.getTotalTokens() > 0) {
                log.info("Zhipuai API estimated token usage: {}", estimatedUsage);
                return estimatedUsage;
@@ -525,7 +526,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
            // 如果手动提取出错，尝试使用原始的extractTokenUsage方法作为后备
            try {
                log.info("Zhipuai API manual extraction error, trying original extractTokenUsage method");
-                TokenUsage fallbackUsage = extractTokenUsage(response);
+                ChatTokenUsage fallbackUsage = extractTokenUsage(response);
                if (fallbackUsage.getTotalTokens() > 0) {
                    log.info("Zhipuai API fallback extraction successful after error: {}", fallbackUsage);
                    return fallbackUsage;
@@ -536,7 +537,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
        }
        
        log.warn("Zhipuai API all token extraction methods failed, returning zeros");
-        return new TokenUsage(0, 0, 0);
+        return new ChatTokenUsage(0, 0, 0);
    }

    /**
@@ -545,10 +546,10 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
     * @param response ChatResponse对象
     * @return 估算的TokenUsage对象
     */
-    private TokenUsage estimateTokenUsageFromResponse(org.springframework.ai.chat.model.ChatResponse response) {
+    private ChatTokenUsage estimateTokenUsageFromResponse(org.springframework.ai.chat.model.ChatResponse response) {
        try {
            if (response == null) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
            }
            
            // 获取输出文本
@@ -563,11 +564,11 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
            log.info("Zhipuai API estimated tokens - output: {} chars -> {} tokens, estimated prompt: {} tokens, total: {} tokens", 
                    outputText.length(), completionTokens, promptTokens, totalTokens);
            
-            return new TokenUsage(promptTokens, completionTokens, totalTokens);
+            return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
            
        } catch (Exception e) {
            log.error("Error estimating token usage", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
        }
    }
    
@@ -639,15 +640,15 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
            }
            
            // 测试手动token提取
-            TokenUsage manualUsage = extractZhipuaiTokenUsage(response);
+            ChatTokenUsage manualUsage = extractZhipuaiTokenUsage(response);
            log.info("Zhipuai API test manual token extraction result: {}", manualUsage);
            
            // 测试原始token提取
-            TokenUsage originalUsage = extractTokenUsage(response);
+            ChatTokenUsage originalUsage = extractTokenUsage(response);
            log.info("Zhipuai API test original token extraction result: {}", originalUsage);
            
            // 测试token估算功能
-            TokenUsage estimatedUsage = estimateTokenUsageFromResponse(response);
+            ChatTokenUsage estimatedUsage = estimateTokenUsageFromResponse(response);
            log.info("Zhipuai API test estimated token usage result: {}", estimatedUsage);
            
            // 测试token估算算法
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/service/BaseSpringAIService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/service/BaseSpringAIService.java
@@ -721,11 +721,6 @@ public abstract class BaseSpringAIService implements SpringAIService {
        processPromptSse(aiPrompt, robot, messageProtobufQuery, messageProtobufReply, emitter, fullPromptContent);
    }

-    // private String createAndProcessPromptSync(String query, String context, RobotProtobuf robot,
-    //         MessageProtobuf messageProtobufQuery, MessageProtobuf messageProtobufReply) {
-    //     return createAndProcessPromptSyncWithPrompt(query, context, robot, messageProtobufQuery, messageProtobufReply).getResponse();
-    // }
-
    private PromptResult createAndProcessPromptSyncWithPrompt(String query, String context, RobotProtobuf robot,
            MessageProtobuf messageProtobufQuery, MessageProtobuf messageProtobufReply) {
        
@@ -1145,7 +1140,7 @@ public abstract class BaseSpringAIService implements SpringAIService {
     * @param response ChatResponse from AI service
     * @return TokenUsage object containing prompt and completion tokens
     */
-    protected TokenUsage extractTokenUsage(Object response) {
+    protected ChatTokenUsage extractTokenUsage(Object response) {
        try {
            if (response instanceof org.springframework.ai.chat.model.ChatResponse) {
                org.springframework.ai.chat.model.ChatResponse chatResponse = (org.springframework.ai.chat.model.ChatResponse) response;
@@ -1306,7 +1301,7 @@ public abstract class BaseSpringAIService implements SpringAIService {
                    
                    log.info("BaseSpringAIService extractTokenUsage extracted tokens - prompt: {}, completion: {}, total: {}", 
                            prompt, completion, total);
-                    return new TokenUsage(prompt, completion, total);
+                    return new ChatTokenUsage(prompt, completion, total);
                }
            }
        } catch (Exception e) {
@@ -1315,7 +1310,7 @@ public abstract class BaseSpringAIService implements SpringAIService {
        
        // 如果无法提取，返回默认值
        log.warn("BaseSpringAIService extractTokenUsage could not extract token usage, returning zeros");
-        return new TokenUsage(0, 0, 0);
+        return new ChatTokenUsage(0, 0, 0);
    }

    /**
@@ -1421,21 +1416,21 @@ public abstract class BaseSpringAIService implements SpringAIService {
    /**
     * Token usage data class
     */
-    protected static class TokenUsage {
-        private final long promptTokens;
-        private final long completionTokens;
-        private final long totalTokens;
+    // protected static class TokenUsage {
+    //     private final long promptTokens;
+    //     private final long completionTokens;
+    //     private final long totalTokens;

-        public TokenUsage(long promptTokens, long completionTokens, long totalTokens) {
-            this.promptTokens = promptTokens;
-            this.completionTokens = completionTokens;
-            this.totalTokens = totalTokens;
-        }
+    //     public TokenUsage(long promptTokens, long completionTokens, long totalTokens) {
+    //         this.promptTokens = promptTokens;
+    //         this.completionTokens = completionTokens;
+    //         this.totalTokens = totalTokens;
+    //     }

-        public long getPromptTokens() { return promptTokens; }
-        public long getCompletionTokens() { return completionTokens; }
-        public long getTotalTokens() { return totalTokens; }
-    }
+    //     public long getPromptTokens() { return promptTokens; }
+    //     public long getCompletionTokens() { return completionTokens; }
+    //     public long getTotalTokens() { return totalTokens; }
+    // }

    // 带prompt参数的抽象方法重载
    protected abstract void processPromptWebsocket(Prompt prompt, RobotProtobuf robot, MessageProtobuf messageProtobufQuery,
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/service/ChatTokenUsage.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/service/ChatTokenUsage.java
@@ -0,0 +1,29 @@
+/*
+ * @Author: jackning 270580156@qq.com
+ * @Date: 2025-08-21 12:42:33
+ * @LastEditors: jackning 270580156@qq.com
+ * @LastEditTime: 2025-08-21 13:05:30
+ * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
+ *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
+ *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
+ *  Business Source License 1.1: https://github.com/Bytedesk/bytedesk/blob/main/LICENSE 
+ *  contact: 270580156@qq.com 
+ * 
+ * Copyright (c) 2025 by bytedesk.com, All Rights Reserved. 
+ */
+package com.bytedesk.ai.springai.service;
+
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+import lombok.Setter;
+
+@Getter
+@Setter
+@AllArgsConstructor
+@NoArgsConstructor
+public class ChatTokenUsage {
+    private long promptTokens;
+    private long completionTokens;
+    private long totalTokens;
+}
--- a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatConfig.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatConfig.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-19 09:39:15
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 09:33:50
+ * @LastEditTime: 2025-08-21 12:22:06
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -41,7 +41,7 @@ public class ZhipuaiChatConfig {
    @Value("${spring.ai.zhipuai.api-key:}")
    private String apiKey;

-    @Value("${spring.ai.zhipuai.chat.options.model:glm-4}")
+    @Value("${spring.ai.zhipuai.chat.options.model:glm-4-flash}")
    private String model;

    @Value("${spring.ai.zhipuai.chat.options.temperature:0.7}")
--- a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatService.java
@@ -0,0 +1,624 @@
+/*
+ * @Author: jackning 270580156@qq.com
+ * @Date: 2025-08-21 12:26:02
+ * @LastEditors: jackning 270580156@qq.com
+ * @LastEditTime: 2025-08-21 12:37:21
+ * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
+ *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
+ *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
+ *  Business Source License 1.1: https://github.com/Bytedesk/bytedesk/blob/main/LICENSE 
+ *  contact: 270580156@qq.com 
+ * 
+ * Copyright (c) 2025 by bytedesk.com, All Rights Reserved. 
+ */
+package com.bytedesk.ai.zhipuai;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.stereotype.Service;
+
+import com.zhipu.oapi.ClientV4;
+import com.zhipu.oapi.Constants;
+import com.zhipu.oapi.service.v4.model.ChatCompletionRequest;
+import com.zhipu.oapi.service.v4.model.ChatFunction;
+import com.zhipu.oapi.service.v4.model.ChatMessage;
+import com.zhipu.oapi.service.v4.model.ChatMessageAccumulator;
+import com.zhipu.oapi.service.v4.model.ChatMessageRole;
+import com.zhipu.oapi.service.v4.model.ChatMeta;
+import com.zhipu.oapi.service.v4.model.ChatTool;
+import com.zhipu.oapi.service.v4.model.ChatToolType;
+import com.zhipu.oapi.service.v4.model.ModelApiResponse;
+import com.zhipu.oapi.service.v4.model.ModelData;
+import com.zhipu.oapi.service.v4.model.QueryModelResultRequest;
+import com.zhipu.oapi.service.v4.model.QueryModelResultResponse;
+import com.zhipu.oapi.service.v4.model.WebSearch;
+
+import lombok.extern.slf4j.Slf4j;
+import reactor.core.publisher.Flux;
+
+@Slf4j
+@Service
+@ConditionalOnProperty(prefix = "spring.ai.zhipuai.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
+public class ZhipuaiChatService {
+
+    @Autowired
+    @Qualifier("zhipuaiChatClient")
+    private ClientV4 client;
+
+    @Autowired
+    private ZhipuaiChatConfig zhipuaiChatConfig;
+
+    /**
+     * 角色扮演聊天
+     */
+    public String rolePlayChat(String message, String userInfo, String botInfo, String botName, String userName) {
+        // 添加请求日志
+        log.info("Zhipuai API role play request - message length: {}, userInfo: {}, botInfo: {}, botName: {}, userName: {}", 
+                message.length(), userInfo, botInfo, botName, userName);
+        
+        long startTime = System.currentTimeMillis();
+        
+        // 使用默认client进行角色扮演聊天
+        ClientV4 chatClient = client;
+        
+        try {
+            if (client == null) {
+                log.error("Zhipuai API client is null");
+                return "Zhipuai client is not available";
+            }
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            ChatMeta meta = new ChatMeta();
+            meta.setUser_info(userInfo);
+            meta.setBot_info(botInfo);
+            meta.setBot_name(botName);
+            meta.setUser_name(userName);
+
+            String requestId = String.format("roleplay-%d", System.currentTimeMillis());
+            
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(Constants.ModelCharGLM3)
+                    .stream(Boolean.FALSE)
+                    .invokeMethod(Constants.invokeMethod)
+                    .messages(messages)
+                    .meta(meta)
+                    .requestId(requestId)
+                    .build();
+            
+            log.info("Zhipuai API role play invoking model with requestId: {}", requestId);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess() && response.getData() != null) {
+                log.info("Zhipuai API role play response success");
+                
+                // 提取token使用情况
+                // TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
+                // log.info("Zhipuai API role play tokenUsage: {}", tokenUsage);
+                
+                Object content = response.getData().getChoices().get(0).getMessage().getContent();
+                return content != null ? content.toString() : null;
+            } else {
+                log.error("Zhipuai API role play error: {}", response.getError());
+                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
+            }
+        } catch (Exception e) {
+            log.error("Zhipuai API role play error: ", e);
+            return "Error: " + e.getMessage();
+        } finally {
+            long responseTime = System.currentTimeMillis() - startTime;
+            log.info("Zhipuai API role play completed in {}ms", responseTime);
+        }
+    }
+
+    /**
+     * Function Calling 聊天
+     */
+    public String functionCallingChat(String message, List<ChatFunction> functions) {
+        return functionCallingChat(message, null, null, functions);
+    }
+
+    /**
+     * Function Calling 聊天（带自定义参数）
+     */
+    public String functionCallingChat(String message, String model, Double temperature, List<ChatFunction> functions) {
+        // 添加请求日志
+        log.info("Zhipuai API function calling request - message length: {}, model: {}, temperature: {}, functions count: {}", 
+                message.length(), model, temperature, functions != null ? functions.size() : 0);
+        
+        long startTime = System.currentTimeMillis();
+        
+        // 使用默认client进行函数调用聊天
+        ClientV4 chatClient = client;
+        
+        try {
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            List<ChatTool> chatToolList = new ArrayList<>();
+            if (functions != null) {
+                for (ChatFunction function : functions) {
+                    ChatTool chatTool = new ChatTool();
+                    chatTool.setType(ChatToolType.FUNCTION.value());
+                    chatTool.setFunction(function);
+                    chatToolList.add(chatTool);
+                }
+            }
+
+            String requestId = String.format("function-%d", System.currentTimeMillis());
+            
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(model != null ? model : zhipuaiChatConfig.getModel())
+                    .stream(Boolean.FALSE)
+                    .invokeMethod(Constants.invokeMethod)
+                    .messages(messages)
+                    .requestId(requestId)
+                    .temperature(temperature != null ? temperature.floatValue() : (float) zhipuaiChatConfig.getTemperature())
+                    .tools(chatToolList)
+                    .toolChoice("auto")
+                    .build();
+            
+            log.info("Zhipuai API function calling invoking model with requestId: {}", requestId);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess() && response.getData() != null) {
+                log.info("Zhipuai API function calling response success");
+                
+                // 提取token使用情况
+                // TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
+                // log.info("Zhipuai API function calling tokenUsage: {}", tokenUsage);
+                
+                Object content = response.getData().getChoices().get(0).getMessage().getContent();
+                return content != null ? content.toString() : null;
+            } else {
+                log.error("Zhipuai API function calling error: {}", response.getError());
+                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
+            }
+        } catch (Exception e) {
+            log.error("Zhipuai API function calling error: ", e);
+            return "Error: " + e.getMessage();
+        } finally {
+            long responseTime = System.currentTimeMillis() - startTime;
+            log.info("Zhipuai API function calling completed in {}ms", responseTime);
+        }
+    }
+
+    /**
+     * 流式 Function Calling 聊天
+     */
+    public Flux<String> functionCallingChatStream(String message, List<ChatFunction> functions) {
+        return functionCallingChatStream(message, null, null, functions);
+    }
+
+    /**
+     * 流式 Function Calling 聊天（带自定义参数）
+     */
+    public Flux<String> functionCallingChatStream(String message, String model, Double temperature, List<ChatFunction> functions) {
+        // 使用默认client进行流式函数调用聊天
+        ClientV4 chatClient = client;
+        
+        try {
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            List<ChatTool> chatToolList = new ArrayList<>();
+            if (functions != null) {
+                for (ChatFunction function : functions) {
+                    ChatTool chatTool = new ChatTool();
+                    chatTool.setType(ChatToolType.FUNCTION.value());
+                    chatTool.setFunction(function);
+                    chatToolList.add(chatTool);
+                }
+            }
+
+            String requestId = String.format("function-stream-%d", System.currentTimeMillis());
+            
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(model != null ? model : zhipuaiChatConfig.getModel())
+                    .stream(Boolean.TRUE)
+                    .messages(messages)
+                    .requestId(requestId)
+                    .temperature(temperature != null ? temperature.floatValue() : (float) zhipuaiChatConfig.getTemperature())
+                    .tools(chatToolList)
+                    .toolChoice("auto")
+                    .build();
+            
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess()) {
+                return Flux.from(mapStreamToAccumulator(response.getFlowable()).map(accumulator -> {
+                    log.info("Zhipuai API function calling accumulator received: {}", accumulator);
+                    log.info("Zhipuai API function calling accumulator class: {}", accumulator.getClass().getName());
+                    
+                    Object delta = accumulator.getDelta();
+                    log.info("Zhipuai API function calling delta: {}", delta);
+                    log.info("Zhipuai API function calling delta class: {}", delta != null ? delta.getClass().getName() : "null");
+                    
+                    // 处理tool_calls（如果有的话）
+                    if (delta != null && delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
+                        com.zhipu.oapi.service.v4.model.ChatMessage deltaMessage = (com.zhipu.oapi.service.v4.model.ChatMessage) delta;
+                        if (deltaMessage.getTool_calls() != null) {
+                            log.info("Zhipuai API function calling tool_calls: {}", deltaMessage.getTool_calls());
+                        }
+                    }
+                    
+                    if (delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
+                        Object content = ((com.zhipu.oapi.service.v4.model.ChatMessage) delta).getContent();
+                        log.info("Zhipuai API function calling content: {}", content);
+                        return content != null ? content.toString() : "";
+                    } else if (delta != null) {
+                        String deltaStr = delta.toString();
+                        log.info("Zhipuai API function calling delta as string: {}", deltaStr);
+                        
+                        // 尝试从JSON字符串中提取content字段
+                        // String extractedContent = extractContentFromDeltaString(deltaStr);
+                        // if (extractedContent != null && !extractedContent.isEmpty()) {
+                        //     log.info("Zhipuai API function calling extracted content: {}", extractedContent);
+                        //     return extractedContent;
+                        // } else if (!isEmptyAssistantMessage(deltaStr)) {
+                        //     return deltaStr;
+                        // } else {
+                        //     return "";
+                        // }
+                    }
+                    return "";
+                }));
+            } else {
+                log.error("Zhipuai API error: {}", response.getError());
+                return Flux.just("Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error"));
+            }
+        } catch (Exception e) {
+            log.error("Error in functionCallingChatStream", e);
+            return Flux.just("Error: " + e.getMessage());
+        }
+    }
+
+    /**
+     * 图像生成 - 暂不支持，需要等待SDK更新
+     */
+    public String generateImage(String prompt) {
+        return "Image generation is not supported in current SDK version";
+    }
+
+    /**
+     * 图像生成（带请求ID）- 暂不支持，需要等待SDK更新
+     */
+    public String generateImage(String prompt, String requestId) {
+        return "Image generation is not supported in current SDK version";
+    }
+
+    /**
+     * 向量嵌入 - 暂不支持，需要等待SDK更新
+     */
+    public List<Double> getEmbedding(String text) {
+        log.warn("Embedding is not supported in current SDK version");
+        return new ArrayList<>();
+    }
+
+    /**
+     * 批量向量嵌入 - 暂不支持，需要等待SDK更新
+     */
+    public List<List<Double>> getEmbeddings(List<String> texts) {
+        log.warn("Embeddings is not supported in current SDK version");
+        return new ArrayList<>();
+    }
+
+    /**
+     * 语音合成 - 暂不支持，需要等待SDK更新
+     */
+    public File generateSpeech(String text, String voice, String responseFormat) {
+        log.warn("Speech synthesis is not supported in current SDK version");
+        return null;
+    }
+
+    /**
+     * 自定义语音合成 - 暂不支持，需要等待SDK更新
+     */
+    public File generateCustomSpeech(String text, String voiceText, File voiceData, String responseFormat) {
+        log.warn("Custom voice synthesis is not supported in current SDK version");
+        return null;
+    }
+
+    /**
+     * 文件上传 - 暂不支持，需要等待SDK更新
+     */
+    public String uploadFile(String filePath, String purpose) {
+        return "File upload is not supported in current SDK version";
+    }
+
+    /**
+     * 查询文件列表 - 暂不支持，需要等待SDK更新
+     */
+    public List<Map<String, Object>> queryFiles() {
+        log.warn("File query is not supported in current SDK version");
+        return new ArrayList<>();
+    }
+
+    /**
+     * 下载文件内容 - 暂不支持，需要等待SDK更新
+     */
+    public File downloadFile(String fileId, String outputPath) {
+        log.warn("File download is not supported in current SDK version");
+        return null;
+    }
+
+    /**
+     * 创建微调任务 - 暂不支持，需要等待SDK更新
+     */
+    public String createFineTuningJob(String model, String trainingFile) {
+        return "Fine-tuning is not supported in current SDK version";
+    }
+
+    /**
+     * 查询微调任务 - 暂不支持，需要等待SDK更新
+     */
+    public Map<String, Object> queryFineTuningJob(String jobId) {
+        log.warn("Fine-tuning query is not supported in current SDK version");
+        return new HashMap<>();
+    }
+
+    /**
+     * 异步聊天
+     */
+    public String chatAsync(String message) {
+        // 添加请求日志
+        log.info("Zhipuai API async request - message length: {}", message.length());
+        
+        long startTime = System.currentTimeMillis();
+        
+        // 使用默认client进行异步聊天
+        ClientV4 chatClient = client;
+        
+        try {
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(zhipuaiChatConfig.getModel())
+                    .stream(Boolean.FALSE)
+                    .invokeMethod(Constants.invokeMethodAsync)
+                    .messages(messages)
+                    .build();
+            
+            log.info("Zhipuai API async invoking model");
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess() && response.getData() != null) {
+                String taskId = response.getData().getId();
+                log.info("Zhipuai API async task created with taskId: {}", taskId);
+                
+                // 轮询获取结果
+                return pollAsyncResult(taskId);
+            } else {
+                log.error("Zhipuai API async error: {}", response.getError());
+                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
+            }
+        } catch (Exception e) {
+            log.error("Zhipuai API async error: ", e);
+            return "Error: " + e.getMessage();
+        } finally {
+            long responseTime = System.currentTimeMillis() - startTime;
+            log.info("Zhipuai API async completed in {}ms", responseTime);
+        }
+    }
+
+    /**
+     * 轮询异步结果
+     */
+    private String pollAsyncResult(String taskId) {
+        log.info("Zhipuai API starting async result polling for taskId: {}", taskId);
+        
+        try {
+            int maxAttempts = 30; // 最多轮询30次
+            int attempt = 0;
+            
+            while (attempt < maxAttempts) {
+                log.debug("Zhipuai API polling attempt {}/{} for taskId: {}", attempt + 1, maxAttempts, taskId);
+                
+                QueryModelResultRequest request = new QueryModelResultRequest();
+                request.setTaskId(taskId);
+                
+                QueryModelResultResponse response = client.queryModelResult(request);
+                
+                if (response.isSuccess() && response.getData() != null) {
+                    Object taskStatus = response.getData().getTaskStatus();
+                    log.debug("Zhipuai API task status: {} for taskId: {}", taskStatus, taskId);
+                    
+                    if ("SUCCESS".equals(taskStatus.toString())) {
+                        log.info("Zhipuai API async task completed successfully for taskId: {}", taskId);
+                        Object content = response.getData().getChoices().get(0).getMessage().getContent();
+                        return content != null ? content.toString() : null;
+                    } else if ("FAILED".equals(taskStatus.toString())) {
+                        log.error("Zhipuai API async task failed for taskId: {}", taskId);
+                        return "Task failed";
+                    }
+                } else {
+                    log.warn("Zhipuai API async polling response not successful for taskId: {}", taskId);
+                }
+                
+                attempt++;
+                Thread.sleep(2000); // 等待2秒后重试
+            }
+            
+            log.error("Zhipuai API async task timeout after {} attempts for taskId: {}", maxAttempts, taskId);
+            return "Task timeout after " + maxAttempts + " attempts";
+        } catch (Exception e) {
+            log.error("Zhipuai API error polling async result for taskId: {}", taskId, e);
+            return "Error: " + e.getMessage();
+        }
+    }
+
+    /**
+     * 带Web搜索的聊天
+     */
+    public String chatWithWebSearch(String message, String searchQuery) {
+        // 添加请求日志
+        log.info("Zhipuai API web search request - message length: {}, searchQuery: {}", 
+                message.length(), searchQuery);
+        
+        long startTime = System.currentTimeMillis();
+        
+        // 使用默认client进行网络搜索聊天
+        ClientV4 chatClient = client;
+        
+        try {
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            List<ChatTool> chatToolList = new ArrayList<>();
+            
+            // 添加Web搜索工具
+            ChatTool webSearchTool = new ChatTool();
+            webSearchTool.setType(ChatToolType.WEB_SEARCH.value());
+            WebSearch webSearch = new WebSearch();
+            webSearch.setSearch_query(searchQuery);
+            webSearch.setSearch_result(true);
+            webSearch.setEnable(true);
+            webSearchTool.setWeb_search(webSearch);
+            chatToolList.add(webSearchTool);
+
+            String requestId = String.format("websearch-%d", System.currentTimeMillis());
+            
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(zhipuaiChatConfig.getModel())
+                    .stream(Boolean.FALSE)
+                    .invokeMethod(Constants.invokeMethod)
+                    .messages(messages)
+                    .requestId(requestId)
+                    .tools(chatToolList)
+                    .toolChoice("auto")
+                    .build();
+            
+            log.info("Zhipuai API web search invoking model with requestId: {}", requestId);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess() && response.getData() != null) {
+                log.info("Zhipuai API web search response success");
+                
+                // 提取token使用情况
+                // TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
+                // log.info("Zhipuai API web search tokenUsage: {}", tokenUsage);
+                
+                Object content = response.getData().getChoices().get(0).getMessage().getContent();
+                return content != null ? content.toString() : null;
+            } else {
+                log.error("Zhipuai API web search error: {}", response.getError());
+                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
+            }
+        } catch (Exception e) {
+            log.error("Zhipuai API web search error: ", e);
+            return "Error: " + e.getMessage();
+        } finally {
+            long responseTime = System.currentTimeMillis() - startTime;
+            log.info("Zhipuai API web search completed in {}ms", responseTime);
+        }
+    }
+
+    /**
+     * 语音模型聊天 - 暂不支持，需要等待SDK更新
+     */
+    public String chatWithVoice(String message) {
+        log.warn("Voice chat is not supported in current SDK version");
+        return "Voice chat is not supported in current SDK version";
+    }
+
+
+    /**
+     * 将流式响应转换为Accumulator，参考官方示例
+     */
+    private io.reactivex.Flowable<ChatMessageAccumulator> mapStreamToAccumulator(io.reactivex.Flowable<ModelData> flowable) {
+        return flowable.map(chunk -> {
+            return new ChatMessageAccumulator(
+                chunk.getChoices().get(0).getDelta(), 
+                null, 
+                chunk.getChoices().get(0), 
+                chunk.getUsage(), 
+                chunk.getCreated(), 
+                chunk.getId()
+            );
+        });
+    }
+
+    /**
+     * 测试流式响应功能
+     * 用于调试流式响应问题
+     */
+    public void testStreamResponse() {
+        // 使用默认client进行测试
+        // ClientV4 chatClient = client;
+        
+        try {
+            log.info("Zhipuai API testing stream response...");
+            
+            // // 创建一个简单的测试请求
+            // String testMessage = "Hello, this is a test message for stream response.";
+            // ChatCompletionRequest chatCompletionRequest = createDynamicRequest(null, testMessage, true);
+            
+            // log.info("Zhipuai API making stream test call with message: {}", testMessage);
+            
+            // // 调用API
+            // ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            // log.info("Zhipuai API stream test response success: {}", response.isSuccess());
+            
+            // if (response.isSuccess()) {
+            //     log.info("Zhipuai API stream test starting flowable processing");
+                
+            //     final int[] messageCount = {0};
+                
+            //     // 使用AtomicBoolean来标记是否是第一个消息，参考官方示例
+            //     // java.util.concurrent.atomic.AtomicBoolean isFirst = new java.util.concurrent.atomic.AtomicBoolean(true);
+                
+            //     mapStreamToAccumulator(response.getFlowable())
+            //             .doOnNext(accumulator -> {
+            //                 messageCount[0]++;
+            //                 log.info("Zhipuai API stream test message #{}: accumulator={}", messageCount[0], accumulator);
+            //                 log.info("Zhipuai API stream test message #{}: accumulator class={}", messageCount[0], accumulator.getClass().getName());
+                            
+            //                 Object delta = accumulator.getDelta();
+            //                 log.info("Zhipuai API stream test message #{}: delta={}", messageCount[0], delta);
+            //                 log.info("Zhipuai API stream test message #{}: delta class={}", messageCount[0], delta != null ? delta.getClass().getName() : "null");
+                            
+            //                 if (delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
+            //                     Object content = ((com.zhipu.oapi.service.v4.model.ChatMessage) delta).getContent();
+            //                     log.info("Zhipuai API stream test message #{}: content={}", messageCount[0], content);
+            //                 } else {
+            //                     log.info("Zhipuai API stream test message #{}: delta is not ChatMessage", messageCount[0]);
+            //                 }
+            //             })
+            //             .doOnComplete(() -> {
+            //                 log.info("Zhipuai API stream test completed, total messages: {}", messageCount[0]);
+            //             })
+            //             .doOnError(error -> {
+            //                 log.error("Zhipuai API stream test error: ", error);
+            //             })
+            //             .subscribe();
+            // } else {
+            //     log.error("Zhipuai API stream test failed: {}", response.getError());
+            // }
+            
+        } catch (Exception e) {
+            log.error("Zhipuai API test stream response error", e);
+        }
+    }
+
+
+    
+}
--- a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiController.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiController.java
@@ -2,7 +2,7 @@
 * @Author: jackning 270580156@qq.com
 * @Date: 2025-02-19 09:39:15
 * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 15:45:45
+ * @LastEditTime: 2025-08-21 12:38:11
 * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
 *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
 *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -49,6 +49,7 @@ public class ZhipuaiController {

    private final BytedeskProperties bytedeskProperties;
    private final ZhipuaiService zhipuaiService;
+    private final ZhipuaiChatService zhipuaiChatService;
    private final ExecutorService executorService = Executors.newCachedThreadPool();

    /**
@@ -140,7 +141,7 @@ public class ZhipuaiController {
        }
        
        try {
-            String result = zhipuaiService.rolePlayChat(message, userInfo, botInfo, botName, userName);
+            String result = zhipuaiChatService.rolePlayChat(message, userInfo, botInfo, botName, userName);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in role play chat", e);
@@ -191,7 +192,7 @@ public class ZhipuaiController {
                functions.add(function);
            }
            
-            String result = zhipuaiService.functionCallingChat(message, functions);
+            String result = zhipuaiChatService.functionCallingChat(message, functions);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in function calling chat", e);
@@ -224,7 +225,7 @@ public class ZhipuaiController {
                functions.add(function);
            }
            
-            return zhipuaiService.functionCallingChatStream(message, functions);
+            return zhipuaiChatService.functionCallingChatStream(message, functions);
        } catch (Exception e) {
            log.error("Error in function calling chat stream", e);
            return Flux.just("Error: " + e.getMessage());
@@ -303,7 +304,7 @@ public class ZhipuaiController {
            functions.add(weatherFunction);
            
            String message = "请告诉我" + city + "的天气情况";
-            String result = zhipuaiService.functionCallingChat(message, functions);
+            String result = zhipuaiChatService.functionCallingChat(message, functions);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in weather function call", e);
@@ -351,7 +352,7 @@ public class ZhipuaiController {
            functions.add(flightFunction);
            
            String message = "请查询从" + from + "到" + to + "的航班价格";
-            String result = zhipuaiService.functionCallingChat(message, functions);
+            String result = zhipuaiChatService.functionCallingChat(message, functions);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in flight function call", e);
@@ -377,7 +378,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Prompt is required"));
            }
            
-            String result = zhipuaiService.generateImage(prompt, requestId);
+            String result = zhipuaiChatService.generateImage(prompt, requestId);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in image generation", e);
@@ -402,7 +403,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Text is required"));
            }
            
-            List<Double> result = zhipuaiService.getEmbedding(text);
+            List<Double> result = zhipuaiChatService.getEmbedding(text);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in embedding", e);
@@ -428,7 +429,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Texts are required"));
            }
            
-            List<List<Double>> result = zhipuaiService.getEmbeddings(texts);
+            List<List<Double>> result = zhipuaiChatService.getEmbeddings(texts);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in embeddings", e);
@@ -455,7 +456,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Text is required"));
            }
            
-            File result = zhipuaiService.generateSpeech(text, voice, responseFormat);
+            File result = zhipuaiChatService.generateSpeech(text, voice, responseFormat);
            if (result != null) {
                return ResponseEntity.ok(JsonResult.success("Speech generated: " + result.getAbsolutePath()));
            } else {
@@ -496,7 +497,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Voice data file not found"));
            }
            
-            File result = zhipuaiService.generateCustomSpeech(text, voiceText, voiceData, responseFormat);
+            File result = zhipuaiChatService.generateCustomSpeech(text, voiceText, voiceData, responseFormat);
            if (result != null) {
                return ResponseEntity.ok(JsonResult.success("Custom voice generated: " + result.getAbsolutePath()));
            } else {
@@ -530,7 +531,7 @@ public class ZhipuaiController {
                purpose = "fine-tune";
            }
            
-            String result = zhipuaiService.uploadFile(filePath, purpose);
+            String result = zhipuaiChatService.uploadFile(filePath, purpose);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in file upload", e);
@@ -549,7 +550,7 @@ public class ZhipuaiController {
        }
        
        try {
-            List<Map<String, Object>> result = zhipuaiService.queryFiles();
+            List<Map<String, Object>> result = zhipuaiChatService.queryFiles();
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in query files", e);
@@ -579,7 +580,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Output path is required"));
            }
            
-            File result = zhipuaiService.downloadFile(fileId, outputPath);
+            File result = zhipuaiChatService.downloadFile(fileId, outputPath);
            if (result != null) {
                return ResponseEntity.ok(JsonResult.success("File downloaded: " + result.getAbsolutePath()));
            } else {
@@ -613,7 +614,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Training file is required"));
            }
            
-            String result = zhipuaiService.createFineTuningJob(model, trainingFile);
+            String result = zhipuaiChatService.createFineTuningJob(model, trainingFile);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in create fine-tuning job", e);
@@ -632,7 +633,7 @@ public class ZhipuaiController {
        }
        
        try {
-            Map<String, Object> result = zhipuaiService.queryFineTuningJob(jobId);
+            Map<String, Object> result = zhipuaiChatService.queryFineTuningJob(jobId);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in query fine-tuning job", e);
@@ -657,7 +658,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Message is required"));
            }
            
-            String result = zhipuaiService.chatAsync(message);
+            String result = zhipuaiChatService.chatAsync(message);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in async chat", e);
@@ -687,7 +688,7 @@ public class ZhipuaiController {
                searchQuery = message; // 如果没有指定搜索查询，使用消息作为搜索查询
            }
            
-            String result = zhipuaiService.chatWithWebSearch(message, searchQuery);
+            String result = zhipuaiChatService.chatWithWebSearch(message, searchQuery);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in web search chat", e);
@@ -712,7 +713,7 @@ public class ZhipuaiController {
                return ResponseEntity.ok(JsonResult.error("Message is required"));
            }
            
-            String result = zhipuaiService.chatWithVoice(message);
+            String result = zhipuaiChatService.chatWithVoice(message);
            return ResponseEntity.ok(JsonResult.success(result));
        } catch (Exception e) {
            log.error("Error in voice chat", e);
@@ -724,39 +725,39 @@ public class ZhipuaiController {
     * 测试流式响应功能
     * GET http://127.0.0.1:9003/zhipuai/test-stream
     */
-    @GetMapping("/test-stream")
-    public ResponseEntity<JsonResult<?>> testStreamResponse() {
-        if (!bytedeskProperties.getDebug()) {
-            return ResponseEntity.ok(JsonResult.error("Zhipuai service is not available"));
-        }
+    // @GetMapping("/test-stream")
+    // public ResponseEntity<JsonResult<?>> testStreamResponse() {
+    //     if (!bytedeskProperties.getDebug()) {
+    //         return ResponseEntity.ok(JsonResult.error("Zhipuai service is not available"));
+    //     }
        
-        try {
-            zhipuaiService.testStreamResponse();
-            return ResponseEntity.ok(JsonResult.success("Stream response test completed. Check logs for details."));
-        } catch (Exception e) {
-            log.error("Error testing stream response", e);
-            return ResponseEntity.ok(JsonResult.error("Error testing stream response: " + e.getMessage()));
-        }
-    }
+    //     try {
+    //         zhipuaiService.testStreamResponse();
+    //         return ResponseEntity.ok(JsonResult.success("Stream response test completed. Check logs for details."));
+    //     } catch (Exception e) {
+    //         log.error("Error testing stream response", e);
+    //         return ResponseEntity.ok(JsonResult.error("Error testing stream response: " + e.getMessage()));
+    //     }
+    // }

    /**
     * 简单流式测试 - 完全按照官方示例代码实现
     * GET http://127.0.0.1:9003/zhipuai/test-simple-stream
     */
-    @GetMapping("/test-simple-stream")
-    public ResponseEntity<JsonResult<?>> testSimpleStream() {
-        if (!bytedeskProperties.getDebug()) {
-            return ResponseEntity.ok(JsonResult.error("Zhipuai service is not available"));
-        }
+    // @GetMapping("/test-simple-stream")
+    // public ResponseEntity<JsonResult<?>> testSimpleStream() {
+    //     if (!bytedeskProperties.getDebug()) {
+    //         return ResponseEntity.ok(JsonResult.error("Zhipuai service is not available"));
+    //     }
        
-        try {
-            zhipuaiService.testSimpleStream();
-            return ResponseEntity.ok(JsonResult.success("Simple stream test completed. Check logs for details."));
-        } catch (Exception e) {
-            log.error("Error testing simple stream", e);
-            return ResponseEntity.ok(JsonResult.error("Error testing simple stream: " + e.getMessage()));
-        }
-    }
+    //     try {
+    //         zhipuaiChatService.testSimpleStream();
+    //         return ResponseEntity.ok(JsonResult.success("Simple stream test completed. Check logs for details."));
+    //     } catch (Exception e) {
+    //         log.error("Error testing simple stream", e);
+    //         return ResponseEntity.ok(JsonResult.error("Error testing simple stream: " + e.getMessage()));
+    //     }
+    // }

    /**
     * 在 Bean 销毁时关闭线程池
--- a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiService.java