From f6f6bb13be26bbbcb9a2c8fc39db22f516e8d48d Mon Sep 17 00:00:00 2001
From: jack ning <github@bytedesk.com>
Date: Thu, 21 Aug 2025 13:08:37 +0800
Subject: [PATCH] update

---
 .../providers/baidu/SpringAIBaiduService.java |  41 +-
 .../dashscope/SpringAIDashscopeService.java   |  21 +-
 .../deepseek/SpringAIDeepseekService.java     |  21 +-
 .../providers/gitee/SpringAIGiteeService.java |   9 +-
 .../minimax/SpringAIMinimaxService.java       |  21 +-
 .../ollama/SpringAIOllamaChatConfig.java      | 114 +--
 .../ollama/SpringAIOllamaChatService.java     | 324 +++++++
 .../ollama/SpringAIOllamaController.java      |   5 +-
 .../ollama/SpringAIOllamaService.java         |  12 +-
 .../openai/SpringAIOpenaiService.java         |   9 +-
 .../openrouter/SpringAIOpenrouterService.java |   9 +-
 .../SpringAISiliconFlowService.java           |   9 +-
 .../tencent/SpringAITencentService.java       |   9 +-
 .../volcengine/SpringAIVolcengineService.java |   9 +-
 .../zhipuai/SpringAIZhipuaiService.java       |  41 +-
 .../springai/service/BaseSpringAIService.java |  37 +-
 .../ai/springai/service/ChatTokenUsage.java   |  29 +
 .../ai/zhipuai/ZhipuaiChatConfig.java         |   4 +-
 .../ai/zhipuai/ZhipuaiChatService.java        | 624 +++++++++++++
 .../ai/zhipuai/ZhipuaiController.java         |  91 +-
 .../bytedesk/ai/zhipuai/ZhipuaiService.java   | 860 ++++--------------
 21 files changed, 1381 insertions(+), 918 deletions(-)
 create mode 100644 modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatService.java
 create mode 100644 modules/ai/src/main/java/com/bytedesk/ai/springai/service/ChatTokenUsage.java
 create mode 100644 modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatService.java

diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/baidu/SpringAIBaiduService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/baidu/SpringAIBaiduService.java
index 750038c063..61f20b9134 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/baidu/SpringAIBaiduService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/baidu/SpringAIBaiduService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 15:10:31
+ * @LastEditTime: 2025-08-21 12:46:15
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -31,6 +31,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 
@@ -82,7 +83,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
         // 记录开始时间和初始化token使用统计
         long startTime = System.currentTimeMillis();
         final boolean[] success = { false };
-        final TokenUsage[] tokenUsage = { new TokenUsage(0, 0, 0) };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
         // 用于累积所有响应文本
         final StringBuilder[] fullResponseText = { new StringBuilder() };
 
@@ -118,7 +119,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
                     // 如果token提取失败，使用累积的完整响应文本来估算token
                     if (tokenUsage[0].getTotalTokens() == 0 && fullResponseText[0].length() > 0) {
                         log.info("Baidu API using accumulated response text for token estimation: {}", fullResponseText[0].toString());
-                        TokenUsage estimatedUsage = estimateBaiduTokenUsageFromText(fullResponseText[0].toString());
+                        ChatTokenUsage estimatedUsage = estimateBaiduTokenUsageFromText(fullResponseText[0].toString());
                         tokenUsage[0] = estimatedUsage;
                         log.info("Baidu API final estimated token usage: {}", estimatedUsage);
                     }
@@ -138,7 +139,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
         log.info("SpringAIBaiduService processPromptSync with full prompt content: {}", fullPromptContent);
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
 
         try {
             if (baiduChatModel == null) {
@@ -212,7 +213,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
         // 记录开始时间和初始化token使用统计
         long startTime = System.currentTimeMillis();
         final boolean[] success = { false };
-        final TokenUsage[] tokenUsage = { new TokenUsage(0, 0, 0) };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
         // 用于累积所有响应文本
         final StringBuilder[] fullResponseText = { new StringBuilder() };
 
@@ -255,7 +256,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
                     // 如果token提取失败，使用累积的完整响应文本来估算token
                     if (tokenUsage[0].getTotalTokens() == 0 && fullResponseText[0].length() > 0) {
                         log.info("Baidu API using accumulated response text for token estimation: {}", fullResponseText[0].toString());
-                        TokenUsage estimatedUsage = estimateBaiduTokenUsageFromText(fullResponseText[0].toString());
+                        ChatTokenUsage estimatedUsage = estimateBaiduTokenUsageFromText(fullResponseText[0].toString());
                         tokenUsage[0] = estimatedUsage;
                         log.info("Baidu API final estimated token usage: {}", estimatedUsage);
                     }
@@ -283,17 +284,17 @@ public class SpringAIBaiduService extends BaseSpringAIService {
      * @param response ChatResponse对象
      * @return TokenUsage对象
      */
-    private TokenUsage extractBaiduTokenUsage(org.springframework.ai.chat.model.ChatResponse response) {
+    private ChatTokenUsage extractBaiduTokenUsage(org.springframework.ai.chat.model.ChatResponse response) {
         try {
             if (response == null) {
                 log.warn("Baidu API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
 
             var metadata = response.getMetadata();
             if (metadata == null) {
                 log.warn("Baidu API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
 
             log.info("Baidu API manual token extraction - metadata: {}", metadata);
@@ -390,7 +391,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
                     log.info("Baidu API manual token extraction result - prompt: {}, completion: {}, total: {}",
                             promptTokens, completionTokens, totalTokens);
 
-                    return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                    return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                 }
             }
 
@@ -404,7 +405,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
 
             // 方法3: 如果手动提取失败，尝试使用原始的extractTokenUsage方法作为后备
             log.info("Baidu API manual extraction failed, trying original extractTokenUsage method");
-            TokenUsage fallbackUsage = extractTokenUsage(response);
+            ChatTokenUsage fallbackUsage = extractTokenUsage(response);
             if (fallbackUsage.getTotalTokens() > 0) {
                 log.info("Baidu API fallback extraction successful: {}", fallbackUsage);
                 return fallbackUsage;
@@ -412,13 +413,13 @@ public class SpringAIBaiduService extends BaseSpringAIService {
 
             // 方法4: 如果所有方法都失败，尝试估算token使用量
             log.info("Baidu API all extraction methods failed, attempting to estimate token usage");
-            TokenUsage estimatedUsage = estimateBaiduTokenUsageFromResponse(response);
+            ChatTokenUsage estimatedUsage = estimateBaiduTokenUsageFromResponse(response);
             log.info("Baidu API estimated token usage: {}", estimatedUsage);
             return estimatedUsage;
 
         } catch (Exception e) {
             log.error("Error in manual Baidu token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
 
@@ -428,10 +429,10 @@ public class SpringAIBaiduService extends BaseSpringAIService {
      * @param response ChatResponse对象
      * @return 估算的TokenUsage对象
      */
-    private TokenUsage estimateBaiduTokenUsageFromResponse(org.springframework.ai.chat.model.ChatResponse response) {
+    private ChatTokenUsage estimateBaiduTokenUsageFromResponse(org.springframework.ai.chat.model.ChatResponse response) {
         try {
             if (response == null || response.getResults() == null || response.getResults().isEmpty()) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             // 获取输出文本
@@ -446,7 +447,7 @@ public class SpringAIBaiduService extends BaseSpringAIService {
             
         } catch (Exception e) {
             log.error("Error estimating Baidu token usage", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
 
@@ -456,10 +457,10 @@ public class SpringAIBaiduService extends BaseSpringAIService {
      * @param outputText 完整的输出文本
      * @return 估算的TokenUsage对象
      */
-    private TokenUsage estimateBaiduTokenUsageFromText(String outputText) {
+    private ChatTokenUsage estimateBaiduTokenUsageFromText(String outputText) {
         try {
             if (outputText == null || outputText.isEmpty()) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             // 估算token使用量
@@ -474,11 +475,11 @@ public class SpringAIBaiduService extends BaseSpringAIService {
                     "Baidu API estimated tokens - output: {} chars -> {} tokens, estimated prompt: {} tokens, total: {} tokens",
                     outputText.length(), completionTokens, promptTokens, totalTokens);
             
-            return new TokenUsage(promptTokens, completionTokens, totalTokens);
+            return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
             
         } catch (Exception e) {
             log.error("Error estimating Baidu token usage from text", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
 
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/dashscope/SpringAIDashscopeService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/dashscope/SpringAIDashscopeService.java
index 1ae02476b6..1962328303 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/dashscope/SpringAIDashscopeService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/dashscope/SpringAIDashscopeService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-18 17:04:41
+ * @LastEditTime: 2025-08-21 12:46:21
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -33,6 +33,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 
@@ -87,7 +88,7 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
         
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         final StringBuilder[] fullResponseText = {new StringBuilder()};
         
         // 使用同一个ChatModel实例，但传入不同的选项
@@ -132,7 +133,7 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
     protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         log.info("Dashscope API sync fullPromptContent: {}", fullPromptContent);
         
@@ -202,7 +203,7 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         final StringBuilder[] fullResponseText = {new StringBuilder()};
 
         bytedeskDashscopeChatModel.stream(requestPrompt).subscribe(
@@ -259,17 +260,17 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
      * @param response ChatResponse对象
      * @return TokenUsage对象
      */
-    private TokenUsage extractDashscopeTokenUsage(ChatResponse response) {
+    private ChatTokenUsage extractDashscopeTokenUsage(ChatResponse response) {
         try {
             if (response == null) {
                 log.warn("Dashscope API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
 
             var metadata = response.getMetadata();
             if (metadata == null) {
                 log.warn("Dashscope API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
 
             log.info("Dashscope API token extraction - metadata: {}", metadata);
@@ -286,18 +287,18 @@ public class SpringAIDashscopeService extends BaseSpringAIService {
                             promptTokens, completionTokens, totalTokens);
                     
                     if (totalTokens > 0) {
-                        return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                        return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                     }
                 }
             } catch (Exception e) {
                 log.debug("Could not get usage via getUsage() method: {}", e.getMessage());
             }
 
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
             
         } catch (Exception e) {
             log.error("Error in Dashscope token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
     
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/deepseek/SpringAIDeepseekService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/deepseek/SpringAIDeepseekService.java
index 717c21b577..ad91a9fdc3 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/deepseek/SpringAIDeepseekService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/deepseek/SpringAIDeepseekService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-18 17:04:22
+ * @LastEditTime: 2025-08-21 12:46:25
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -34,6 +34,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 
@@ -87,7 +88,7 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
         
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         
         // 使用同一个ChatModel实例，但传入不同的选项
         deepseekChatModel.stream(requestPrompt).subscribe(
@@ -126,7 +127,7 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
         log.info("SpringAIDeepseekService processPromptSync with full prompt content: {}", fullPromptContent);
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         try {
             if (deepseekChatModel == null) {
@@ -195,7 +196,7 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         deepseekChatModel.stream(requestPrompt).subscribe(
                 response -> {
@@ -245,17 +246,17 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
      * @param response ChatResponse对象
      * @return TokenUsage对象
      */
-    private TokenUsage extractDeepSeekTokenUsage(ChatResponse response) {
+    private ChatTokenUsage extractDeepSeekTokenUsage(ChatResponse response) {
         try {
             if (response == null) {
                 log.warn("Deepseek API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
 
             var metadata = response.getMetadata();
             if (metadata == null) {
                 log.warn("Deepseek API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
 
             log.info("Deepseek API token extraction - metadata: {}", metadata);
@@ -272,18 +273,18 @@ public class SpringAIDeepseekService extends BaseSpringAIService {
                             promptTokens, completionTokens, totalTokens);
                     
                     if (totalTokens > 0) {
-                        return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                        return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                     }
                 }
             } catch (Exception e) {
                 log.debug("Could not get usage via getUsage() method: {}", e.getMessage());
             }
 
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
             
         } catch (Exception e) {
             log.error("Error in Deepseek token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
     
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/gitee/SpringAIGiteeService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/gitee/SpringAIGiteeService.java
index 36891f9984..7eec56b496 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/gitee/SpringAIGiteeService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/gitee/SpringAIGiteeService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 13:23:44
+ * @LastEditTime: 2025-08-21 12:46:30
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -32,6 +32,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 
@@ -84,7 +85,7 @@ public class SpringAIGiteeService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         // 使用同一个ChatModel实例，但传入不同的选项
         giteeChatModel.stream(requestPrompt).subscribe(
@@ -123,7 +124,7 @@ public class SpringAIGiteeService extends BaseSpringAIService {
         log.info("SpringAIGiteeService processPromptSync with full prompt content: {}", fullPromptContent);
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         try {
             if (giteeChatModel == null) {
@@ -193,7 +194,7 @@ public class SpringAIGiteeService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         giteeChatModel.stream(requestPrompt).subscribe(
                 response -> {
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/minimax/SpringAIMinimaxService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/minimax/SpringAIMinimaxService.java
index 5043a9beb7..9a6fecd3ea 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/minimax/SpringAIMinimaxService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/minimax/SpringAIMinimaxService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-22 08:34:28
+ * @LastEditTime: 2025-08-21 12:46:36
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -34,6 +34,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 
@@ -87,7 +88,7 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
         
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         
         // 使用同一个ChatModel实例，但传入不同的选项
         minimaxChatModel.stream(requestPrompt).subscribe(
@@ -126,7 +127,7 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
         log.info("SpringAIMinimaxService processPromptSync with full prompt content: {}", fullPromptContent);
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         try {
             if (minimaxChatModel == null) {
@@ -195,7 +196,7 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         minimaxChatModel.stream(requestPrompt).subscribe(
                 response -> {
@@ -245,17 +246,17 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
      * @param response ChatResponse对象
      * @return TokenUsage对象
      */
-    private TokenUsage extractDeepSeekTokenUsage(ChatResponse response) {
+    private ChatTokenUsage extractDeepSeekTokenUsage(ChatResponse response) {
         try {
             if (response == null) {
                 log.warn("Minimax API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
 
             var metadata = response.getMetadata();
             if (metadata == null) {
                 log.warn("Minimax API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
 
             log.info("Minimax API token extraction - metadata: {}", metadata);
@@ -272,18 +273,18 @@ public class SpringAIMinimaxService extends BaseSpringAIService {
                             promptTokens, completionTokens, totalTokens);
                     
                     if (totalTokens > 0) {
-                        return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                        return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                     }
                 }
             } catch (Exception e) {
                 log.debug("Could not get usage via getUsage() method: {}", e.getMessage());
             }
 
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
             
         } catch (Exception e) {
             log.error("Error in Minimax token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
     
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatConfig.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatConfig.java
index b992ce287f..e8738b4085 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatConfig.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatConfig.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2024-05-31 10:24:39
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-08-20 21:25:13
+ * @LastEditTime: 2025-08-21 12:39:58
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license.
@@ -13,67 +13,67 @@
  */
 package com.bytedesk.ai.springai.providers.ollama;
 
-// import org.springframework.ai.chat.client.ChatClient;
-// import org.springframework.ai.chat.client.advisor.SimpleLoggerAdvisor;
-// import org.springframework.ai.ollama.OllamaChatModel;
-// import org.springframework.ai.ollama.api.OllamaApi;
-// import org.springframework.ai.ollama.api.OllamaOptions;
-// import org.springframework.beans.factory.annotation.Value;
-// import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
-// import org.springframework.context.annotation.Bean;
-// import org.springframework.context.annotation.Configuration;
-// import lombok.Data;
-// import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.chat.client.ChatClient;
+import org.springframework.ai.chat.client.advisor.SimpleLoggerAdvisor;
+import org.springframework.ai.ollama.OllamaChatModel;
+import org.springframework.ai.ollama.api.OllamaApi;
+import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import lombok.Data;
+import lombok.extern.slf4j.Slf4j;
 
-// /**
-//  * https://ollama.com/
-//  * https://www.promptingguide.ai/
-//  * Ollama Chat Configuration
-//  */
-// @Slf4j
-// @Data
-// @Configuration
-// @ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
-// public class SpringAIOllamaChatConfig {
+/**
+ * https://ollama.com/
+ * https://www.promptingguide.ai/
+ * Ollama Chat Configuration
+ */
+@Slf4j
+@Data
+@Configuration
+@ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
+public class SpringAIOllamaChatConfig {
 
-//     @Value("${spring.ai.ollama.base-url:http://host.docker.internal:11434}")
-//     private String ollamaBaseUrl;
+    @Value("${spring.ai.ollama.base-url:http://host.docker.internal:11434}")
+    private String ollamaBaseUrl;
 
-//     @Value("${spring.ai.ollama.chat.options.model:qwen3:0.6b}")
-//     private String ollamaChatOptionsModel;
+    @Value("${spring.ai.ollama.chat.options.model:qwen3:0.6b}")
+    private String ollamaChatOptionsModel;
 
-//     @Bean("bytedeskOllamaApi")
-//     OllamaApi bytedeskOllamaApi() {
-//         return OllamaApi.builder()
-//                 .baseUrl(ollamaBaseUrl)
-//                 .build();
-//     }
+    @Bean("bytedeskOllamaApi")
+    OllamaApi bytedeskOllamaApi() {
+        return OllamaApi.builder()
+                .baseUrl(ollamaBaseUrl)
+                .build();
+    }
 
-//     @Bean("bytedeskOllamaChatOptions")
-//     OllamaOptions bytedeskOllamaChatOptions() {
-//         return OllamaOptions.builder()
-//                 .model(ollamaChatOptionsModel)
-//                 // 使用keepAlive而不是timeout来设置超时
-//                 // .keepAlive("30s") // 使用30秒的超时设置
-//                 // .numKeep(0)  // 不保留历史对话上下文
-//                 // .useNUMA(ollamaChatOptionsNuma) // 使用正确的方法名useNUMA而不是numa
-//                 .build();
-//     }
+    @Bean("bytedeskOllamaChatOptions")
+    OllamaOptions bytedeskOllamaChatOptions() {
+        return OllamaOptions.builder()
+                .model(ollamaChatOptionsModel)
+                // 使用keepAlive而不是timeout来设置超时
+                // .keepAlive("30s") // 使用30秒的超时设置
+                // .numKeep(0)  // 不保留历史对话上下文
+                // .useNUMA(ollamaChatOptionsNuma) // 使用正确的方法名useNUMA而不是numa
+                .build();
+    }
 
-//     @Bean("bytedeskOllamaChatModel")
-//     OllamaChatModel bytedeskOllamaChatModel() {
-//         return OllamaChatModel.builder()
-//                 .ollamaApi(bytedeskOllamaApi())
-//                 .defaultOptions(bytedeskOllamaChatOptions())
-//                 .build();
-//     }
+    @Bean("bytedeskOllamaChatModel")
+    OllamaChatModel bytedeskOllamaChatModel() {
+        return OllamaChatModel.builder()
+                .ollamaApi(bytedeskOllamaApi())
+                .defaultOptions(bytedeskOllamaChatOptions())
+                .build();
+    }
 
-//     @Bean("bytedeskOllamaChatClient")
-//     ChatClient bytedeskOllamaChatClient() {
-//         return ChatClient.builder(bytedeskOllamaChatModel())
-//                 .defaultOptions(bytedeskOllamaChatOptions())
-//                 .defaultAdvisors(new SimpleLoggerAdvisor())
-//                 .build();
-//     }
+    @Bean("bytedeskOllamaChatClient")
+    ChatClient bytedeskOllamaChatClient() {
+        return ChatClient.builder(bytedeskOllamaChatModel())
+                .defaultOptions(bytedeskOllamaChatOptions())
+                .defaultAdvisors(new SimpleLoggerAdvisor())
+                .build();
+    }
 
-// } 
\ No newline at end of file
+} 
\ No newline at end of file
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatService.java
new file mode 100644
index 0000000000..a9ccc9a923
--- /dev/null
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaChatService.java
@@ -0,0 +1,324 @@
+/*
+ * @Author: jackning 270580156@qq.com
+ * @Date: 2025-02-26 16:59:14
+ * @LastEditors: jackning 270580156@qq.com
+ * @LastEditTime: 2025-08-21 12:45:00
+ * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
+ *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
+ *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
+ *  Business Source License 1.1: https://github.com/Bytedesk/bytedesk/blob/main/LICENSE 
+ *  contact: 270580156@qq.com 
+ * 
+ * Copyright (c) 2025 by bytedesk.com, All Rights Reserved. 
+ */
+package com.bytedesk.ai.springai.providers.ollama;
+
+import java.util.List;
+import org.springframework.ai.chat.messages.AssistantMessage;
+import org.springframework.ai.chat.model.Generation;
+import org.springframework.ai.chat.prompt.Prompt;
+import org.springframework.ai.ollama.OllamaChatModel;
+import org.springframework.ai.ollama.api.OllamaApi;
+import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.http.HttpStatus;
+import org.springframework.stereotype.Service;
+import org.springframework.util.Assert;
+import org.springframework.util.StringUtils;
+import org.springframework.web.client.HttpClientErrorException;
+import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
+
+import com.bytedesk.ai.robot.RobotLlm;
+import com.bytedesk.ai.robot.RobotProtobuf;
+import com.bytedesk.ai.springai.service.BaseSpringAIService;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
+import com.bytedesk.core.constant.LlmConsts;
+import com.bytedesk.core.message.MessageProtobuf;
+import com.bytedesk.core.message.MessageTypeEnum;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Service
+@ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
+public class SpringAIOllamaChatService extends BaseSpringAIService {
+
+    @Autowired
+    @Qualifier("bytedeskOllamaApi")
+    private OllamaApi bytedeskOllamaApi;
+
+    @Autowired(required = false)
+    @Qualifier("bytedeskOllamaChatModel")
+    private OllamaChatModel bytedeskOllamaChatModel;
+
+    public SpringAIOllamaChatService() {
+        super(); // 调用基类的无参构造函数
+    }
+
+    /**
+     * 根据机器人配置创建动态的OllamaOptions
+     * 
+     * @param llm 机器人LLM配置
+     * @return 根据机器人配置创建的选项
+     */
+    private OllamaOptions createOllamaOptions(RobotLlm llm) {
+        return super.createDynamicOptions(llm, robotLlm -> OllamaOptions.builder()
+                .model(robotLlm.getTextModel())
+                .temperature(robotLlm.getTemperature())
+                .topP(robotLlm.getTopP())
+                .topK(robotLlm.getTopK())
+                .build());
+    }
+
+    public OllamaApi createOllamaApi(String apiUrl) {
+        return OllamaApi.builder()
+                .baseUrl(apiUrl)
+                .build();
+    }
+
+    /**
+     * 检查模型是否存在
+     * 
+     * @param modelName 模型名称
+     * @return 如果模型存在返回true，否则返回false
+     */
+    public Boolean isModelExists(OllamaRequest request) {
+        OllamaApi ollamaApi = createOllamaApi(request.getApiUrl());
+        String modelName = request.getModel();
+        Assert.hasText(modelName, "Model name must not be null or empty");
+        try {
+            ollamaApi.showModel(new OllamaApi.ShowModelRequest(modelName));
+            return true;
+        } catch (HttpClientErrorException e) {
+            if (e.getStatusCode() == HttpStatus.NOT_FOUND) {
+                return false;
+            }
+            log.error("检查模型是否存在时发生错误: {}, 状态码: {}", modelName, e.getStatusCode());
+        } catch (Exception e) {
+            log.error("检查模型是否存在时发生未知错误: {}, 错误: {}", modelName, e.getMessage());
+        }
+        return false;
+    }
+
+    protected void processPromptWebsocket(Prompt prompt, RobotProtobuf robot, MessageProtobuf messageProtobufQuery,
+            MessageProtobuf messageProtobufReply, String fullPromptContent) {
+        // 从robot中获取llm配置
+        RobotLlm llm = robot.getLlm();
+        log.info("Ollama API websocket fullPromptContent: {}", fullPromptContent);
+        if (llm == null) {
+            log.info("Ollama API not available");
+            sendMessageWebsocket(MessageTypeEnum.ERROR, "Ollama service is not available", messageProtobufReply);
+            return;
+        }
+
+        // 获取适当的模型实例
+        OllamaChatModel chatModel = bytedeskOllamaChatModel;
+        if (chatModel == null) {
+            log.info("Ollama API not available");
+            sendMessageWebsocket(MessageTypeEnum.ERROR, "Ollama service is not available", messageProtobufReply);
+            return;
+        }
+
+        long startTime = System.currentTimeMillis();
+        final boolean[] success = { false };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
+
+        try {
+            chatModel.stream(prompt).subscribe(
+                    response -> {
+                        if (response != null) {
+                            log.info("Ollama API response metadata: {}", response.getMetadata());
+                            List<Generation> generations = response.getResults();
+                            for (Generation generation : generations) {
+                                AssistantMessage assistantMessage = generation.getOutput();
+                                String textContent = assistantMessage.getText();
+                                log.info("Ollama API Websocket response text: {}", textContent);
+
+                                sendMessageWebsocket(MessageTypeEnum.STREAM, textContent, messageProtobufReply);
+                            }
+                            // 提取token使用情况
+                            tokenUsage[0] = extractTokenUsage(response);
+                            success[0] = true;
+                        }
+                    },
+                    error -> {
+                        log.error("Ollama API error: ", error);
+                        sendMessageWebsocket(MessageTypeEnum.ERROR, "服务暂时不可用，请稍后重试", messageProtobufReply);
+                        success[0] = false;
+                    },
+                    () -> {
+                        log.info("Chat stream completed");
+                        // 记录token使用情况
+                        long responseTime = System.currentTimeMillis() - startTime;
+                        String modelType = (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel()
+                                : "llama2";
+                        recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                                tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), success[0],
+                                responseTime);
+                    });
+        } catch (Exception e) {
+            log.error("Error processing Ollama prompt", e);
+            sendMessageWebsocket(MessageTypeEnum.ERROR, "服务暂时不可用，请稍后重试", messageProtobufReply);
+            success[0] = false;
+            // 记录token使用情况
+            long responseTime = System.currentTimeMillis() - startTime;
+            String modelType = (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel() : "llama2";
+            recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                    tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), success[0], responseTime);
+        }
+    }
+
+    protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
+        long startTime = System.currentTimeMillis();
+        boolean success = false;
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
+
+        log.info("Ollama API sync fullPromptContent: {}", fullPromptContent);
+
+        // 从robot中获取llm配置
+        RobotLlm llm = robot.getLlm();
+        log.info("Ollama API websocket fullPromptContent: {}", fullPromptContent);
+
+        if (llm == null) {
+            log.info("Ollama API not available");
+            return "Ollama service is not available";
+        }
+
+        // 获取适当的模型实例
+        OllamaChatModel chatModel = bytedeskOllamaChatModel;
+
+        try {
+            try {
+                // 如果有robot参数，尝试创建自定义选项
+                if (robot != null && robot.getLlm() != null) {
+                    // 创建自定义选项
+                    OllamaOptions customOptions = createOllamaOptions(robot.getLlm());
+                    if (customOptions != null) {
+                        // 使用自定义选项创建Prompt
+                        Prompt prompt = new Prompt(message, customOptions);
+                        var response = chatModel.call(prompt);
+                        log.info("Ollama API Sync response metadata: {}", response.getMetadata());
+                        tokenUsage = extractTokenUsage(response);
+                        success = true;
+                        return extractTextFromResponse(response);
+                    }
+                }
+                var response = chatModel.call(message);
+                tokenUsage = extractTokenUsage(response);
+                success = true;
+                return extractTextFromResponse(response);
+            } catch (Exception e) {
+                log.error("Ollama API sync error", e);
+                success = false;
+                return "服务暂时不可用，请稍后重试";
+            }
+
+        } catch (Exception e) {
+            log.error("Ollama API sync error", e);
+            success = false;
+            return "服务暂时不可用，请稍后重试";
+        } finally {
+            // 记录token使用情况
+            long responseTime = System.currentTimeMillis() - startTime;
+            String modelType = (robot != null && robot.getLlm() != null
+                    && StringUtils.hasText(robot.getLlm().getTextModel()))
+                            ? robot.getLlm().getTextModel()
+                            : "llama2";
+            recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                    tokenUsage.getPromptTokens(), tokenUsage.getCompletionTokens(), success, responseTime);
+        }
+    }
+
+    protected void processPromptSse(Prompt prompt, RobotProtobuf robot, MessageProtobuf messageProtobufQuery,
+            MessageProtobuf messageProtobufReply, SseEmitter emitter, String fullPromptContent) {
+        Assert.notNull(emitter, "SseEmitter must not be null");
+        // 从robot中获取llm配置
+        RobotLlm llm = robot.getLlm();
+        log.info("Ollama API SSE fullPromptContent: {}", fullPromptContent);
+
+        if (llm == null) {
+            log.info("Ollama API not available");
+            sendStreamEndMessage(messageProtobufQuery, messageProtobufReply, emitter, 0, 0, 0, fullPromptContent,
+                    LlmConsts.OLLAMA,
+                    (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel() : "llama2");
+            return;
+        }
+
+        // 获取适当的模型实例
+        OllamaChatModel chatModel = bytedeskOllamaChatModel;
+
+        if (chatModel == null) {
+            log.info("Ollama API not available");
+            // 使用sendStreamEndMessage方法替代重复的代码
+            sendStreamEndMessage(messageProtobufQuery, messageProtobufReply, emitter, 0, 0, 0, fullPromptContent,
+                    LlmConsts.OLLAMA,
+                    (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel() : "llama2");
+            return;
+        }
+
+        long startTime = System.currentTimeMillis();
+        final boolean[] success = { false };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
+
+        try {
+            // 发送初始消息，告知用户请求已收到，正在处理
+            sendStreamStartMessage(messageProtobufReply, emitter, "正在思考中...");
+
+            chatModel.stream(prompt).subscribe(
+                    response -> {
+                        try {
+                            if (response != null && !isEmitterCompleted(emitter)) {
+                                List<Generation> generations = response.getResults();
+                                for (Generation generation : generations) {
+                                    AssistantMessage assistantMessage = generation.getOutput();
+                                    String textContent = assistantMessage.getText();
+                                    log.info("Ollama API SSE response text: {}", textContent);
+
+                                    sendStreamMessage(messageProtobufQuery, messageProtobufReply, emitter, textContent);
+                                }
+                                // 提取token使用情况
+                                tokenUsage[0] = extractTokenUsage(response);
+                                success[0] = true;
+                            }
+                        } catch (Exception e) {
+                            log.error("Ollama API SSE error 1: ", e);
+                            handleSseError(e, messageProtobufQuery, messageProtobufReply, emitter);
+                            success[0] = false;
+                        }
+                    },
+                    error -> {
+                        log.error("Ollama API SSE error 2: ", error);
+                        handleSseError(error, messageProtobufQuery, messageProtobufReply, emitter);
+                        success[0] = false;
+                    },
+                    () -> {
+                        log.info("Ollama API SSE complete");
+                        // 发送流结束消息，包含token使用情况和prompt内容
+                        sendStreamEndMessage(messageProtobufQuery, messageProtobufReply, emitter,
+                                tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(),
+                                tokenUsage[0].getTotalTokens(), fullPromptContent, LlmConsts.OLLAMA,
+                                (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel()
+                                        : "llama2");
+                        // 记录token使用情况
+                        long responseTime = System.currentTimeMillis() - startTime;
+                        String modelType = (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel()
+                                : "llama2";
+                        recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                                tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), success[0],
+                                responseTime);
+                    });
+        } catch (Exception e) {
+            log.error("Error starting Ollama stream 4", e);
+            handleSseError(e, messageProtobufQuery, messageProtobufReply, emitter);
+            success[0] = false;
+            // 记录token使用情况
+            long responseTime = System.currentTimeMillis() - startTime;
+            String modelType = (llm != null && StringUtils.hasText(llm.getTextModel())) ? llm.getTextModel() : "llama2";
+            recordAiTokenUsage(robot, LlmConsts.OLLAMA, modelType,
+                    tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), success[0], responseTime);
+        }
+    }
+
+}
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaController.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaController.java
index bf391f35ed..f40fd1029e 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaController.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaController.java
@@ -3,7 +3,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2024-05-31 09:50:56
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-08-20 21:25:31
+ * @LastEditTime: 2025-08-21 12:40:17
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license.
@@ -23,6 +23,7 @@ import org.springframework.ai.chat.prompt.Prompt;
 import org.springframework.ai.ollama.OllamaChatModel;
 import org.springframework.ai.ollama.api.OllamaApi;
 import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.GetMapping;
@@ -43,7 +44,7 @@ import reactor.core.publisher.Flux;
 @RestController
 @RequestMapping("/api/v1/ollama")
 @RequiredArgsConstructor
-// @ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
+@ConditionalOnProperty(prefix = "spring.ai.ollama.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
 public class SpringAIOllamaController {
 
     private final SpringAIOllamaService springAIOllamaService;
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaService.java
index a6df0a5348..0deaa40364 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/ollama/SpringAIOllamaService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-26 16:59:14
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-08-20 10:57:53
+ * @LastEditTime: 2025-08-21 12:45:56
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -36,6 +36,7 @@ import com.bytedesk.ai.provider.LlmProviderRestService;
 import com.bytedesk.ai.robot.RobotLlm;
 import com.bytedesk.ai.robot.RobotProtobuf;
 import com.bytedesk.ai.springai.service.BaseSpringAIService;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
@@ -154,7 +155,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = { false };
-        final TokenUsage[] tokenUsage = { new TokenUsage(0, 0, 0) };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
 
         try {
             chatModel.stream(prompt).subscribe(
@@ -165,6 +166,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
                             for (Generation generation : generations) {
                                 AssistantMessage assistantMessage = generation.getOutput();
                                 String textContent = assistantMessage.getText();
+                                log.info("Ollama API Websocket response text: {}", textContent);
 
                                 sendMessageWebsocket(MessageTypeEnum.STREAM, textContent, messageProtobufReply);
                             }
@@ -204,7 +206,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
     protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
 
         log.info("Ollama API sync fullPromptContent: {}", fullPromptContent);
 
@@ -230,6 +232,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
                         // 使用自定义选项创建Prompt
                         Prompt prompt = new Prompt(message, customOptions);
                         var response = chatModel.call(prompt);
+                        log.info("Ollama API Sync response metadata: {}", response.getMetadata());
                         tokenUsage = extractTokenUsage(response);
                         success = true;
                         return extractTextFromResponse(response);
@@ -291,7 +294,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = { false };
-        final TokenUsage[] tokenUsage = { new TokenUsage(0, 0, 0) };
+        final ChatTokenUsage[] tokenUsage = { new ChatTokenUsage(0, 0, 0) };
 
         try {
             // 发送初始消息，告知用户请求已收到，正在处理
@@ -305,6 +308,7 @@ public class SpringAIOllamaService extends BaseSpringAIService {
                                 for (Generation generation : generations) {
                                     AssistantMessage assistantMessage = generation.getOutput();
                                     String textContent = assistantMessage.getText();
+                                    log.info("Ollama API SSE response text: {}", textContent);
 
                                     sendStreamMessage(messageProtobufQuery, messageProtobufReply, emitter, textContent);
                                 }
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openai/SpringAIOpenaiService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openai/SpringAIOpenaiService.java
index bf68f2fba6..70669f495f 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openai/SpringAIOpenaiService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openai/SpringAIOpenaiService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 11:38:24
+ * @LastEditTime: 2025-08-21 12:46:49
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -31,6 +31,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 
@@ -82,7 +83,7 @@ public class SpringAIOpenaiService extends BaseSpringAIService {
         
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         
         // 使用同一个ChatModel实例，但传入不同的选项
         openaiChatModel.stream(requestPrompt).subscribe(
@@ -120,7 +121,7 @@ public class SpringAIOpenaiService extends BaseSpringAIService {
     protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         log.info("OpenAI API sync fullPromptContent: {}", fullPromptContent);
         
@@ -188,7 +189,7 @@ public class SpringAIOpenaiService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         openaiChatModel.stream(requestPrompt).subscribe(
                 response -> {
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openrouter/SpringAIOpenrouterService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openrouter/SpringAIOpenrouterService.java
index aeda40b020..db607ce9b8 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openrouter/SpringAIOpenrouterService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/openrouter/SpringAIOpenrouterService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 11:38:40
+ * @LastEditTime: 2025-08-21 12:46:55
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -31,6 +31,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 
@@ -83,7 +84,7 @@ public class SpringAIOpenrouterService extends BaseSpringAIService {
         
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         
         // 使用同一个ChatModel实例，但传入不同的选项
         openrouterChatModel.stream(requestPrompt).subscribe(
@@ -122,7 +123,7 @@ public class SpringAIOpenrouterService extends BaseSpringAIService {
         log.info("SpringAIOpenrouterService processPromptSync with full prompt content: {}", fullPromptContent);
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         try {
             if (openrouterChatModel == null) {
@@ -184,7 +185,7 @@ public class SpringAIOpenrouterService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         openrouterChatModel.stream(requestPrompt).subscribe(
                 response -> {
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/siliconflow/SpringAISiliconFlowService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/siliconflow/SpringAISiliconFlowService.java
index 6a55bfb6de..b903d8d32d 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/siliconflow/SpringAISiliconFlowService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/siliconflow/SpringAISiliconFlowService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 14:17:40
+ * @LastEditTime: 2025-08-21 12:47:01
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM –
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license.
@@ -32,6 +32,7 @@ import org.springframework.util.StringUtils;
 import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
 import java.util.List;
 import java.util.Optional;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 /**
  * @author: https://github.com/fzj111
@@ -85,7 +86,7 @@ public class SpringAISiliconFlowService extends BaseSpringAIService {
         
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         
         // 使用同一个ChatModel实例，但传入不同的选项
         siliconFlowChatModel.get().stream(requestPrompt).subscribe(
@@ -126,7 +127,7 @@ public class SpringAISiliconFlowService extends BaseSpringAIService {
     protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         log.info("SiliconFlow API sync fullPromptContent: {}", fullPromptContent);
         
@@ -197,7 +198,7 @@ public class SpringAISiliconFlowService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         siliconFlowChatModel.get().stream(requestPrompt).subscribe(
                 response -> {
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/tencent/SpringAITencentService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/tencent/SpringAITencentService.java
index a3ce157132..344cacba9d 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/tencent/SpringAITencentService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/tencent/SpringAITencentService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 14:17:58
+ * @LastEditTime: 2025-08-21 12:47:07
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -32,6 +32,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 
@@ -83,7 +84,7 @@ public class SpringAITencentService extends BaseSpringAIService {
         
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         
         // 使用同一个ChatModel实例，但传入不同的选项
         tencentChatModel.stream(requestPrompt).subscribe(
@@ -123,7 +124,7 @@ public class SpringAITencentService extends BaseSpringAIService {
     protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         log.info("Tencent API sync fullPromptContent: {}", fullPromptContent);
         
@@ -192,7 +193,7 @@ public class SpringAITencentService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         tencentChatModel.stream(requestPrompt).subscribe(
                 response -> {
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/volcengine/SpringAIVolcengineService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/volcengine/SpringAIVolcengineService.java
index 5b78c050f2..013c631427 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/volcengine/SpringAIVolcengineService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/volcengine/SpringAIVolcengineService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-28 11:44:03
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 14:16:43
+ * @LastEditTime: 2025-08-21 12:47:12
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -32,6 +32,7 @@ import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
 import lombok.extern.slf4j.Slf4j;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 @Slf4j
 @Service
@@ -82,7 +83,7 @@ public class SpringAIVolcengineService extends BaseSpringAIService {
         
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         
         // 使用同一个ChatModel实例，但传入不同的选项
         volcengineChatModel.stream(requestPrompt).subscribe(
@@ -121,7 +122,7 @@ public class SpringAIVolcengineService extends BaseSpringAIService {
         log.info("SpringAIVolcengineService processPromptSync with full prompt content: {}", fullPromptContent);
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         try {
             if (volcengineChatModel == null) {
@@ -191,7 +192,7 @@ public class SpringAIVolcengineService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         volcengineChatModel.stream(requestPrompt).subscribe(
                 response -> {
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/zhipuai/SpringAIZhipuaiService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/zhipuai/SpringAIZhipuaiService.java
index 5b6fda2a60..792466d158 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/zhipuai/SpringAIZhipuaiService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/providers/zhipuai/SpringAIZhipuaiService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-26 16:58:56
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 13:58:57
+ * @LastEditTime: 2025-08-21 12:47:17
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -35,6 +35,7 @@ import com.bytedesk.ai.springai.service.BaseSpringAIService;
 import com.bytedesk.core.constant.LlmConsts;
 import com.bytedesk.core.message.MessageProtobuf;
 import com.bytedesk.core.message.MessageTypeEnum;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 import lombok.extern.slf4j.Slf4j;
 import reactor.core.publisher.Flux;
@@ -117,7 +118,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
 
         chatModel.stream(prompt).subscribe(
                 response -> {
@@ -160,7 +161,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
     protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
         
         log.info("Zhipuai API sync fullPromptContent: {}", fullPromptContent);
         
@@ -263,7 +264,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         final ChatResponse[] lastResponse = {null};
 
         Flux<ChatResponse> responseFlux = chatModel.stream(prompt);
@@ -341,16 +342,16 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
      * @param response ChatResponse对象
      * @return TokenUsage对象
      */
-    private TokenUsage extractZhipuaiTokenUsage(org.springframework.ai.chat.model.ChatResponse response) {
+    private ChatTokenUsage extractZhipuaiTokenUsage(org.springframework.ai.chat.model.ChatResponse response) {
         try {
             if (response == null) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             var metadata = response.getMetadata();
             if (metadata == null) {
                 log.warn("Zhipuai API response metadata is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             log.info("Zhipuai API manual token extraction - metadata: {}", metadata);
@@ -440,7 +441,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
                     log.info("Zhipuai API manual token extraction result from string parsing - prompt: {}, completion: {}, total: {}", 
                             promptTokens, completionTokens, totalTokens);
                     
-                    return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                    return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                 }
             }
             
@@ -491,7 +492,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
                     log.info("Zhipuai API manual token extraction result - prompt: {}, completion: {}, total: {}", 
                             promptTokens, completionTokens, totalTokens);
                     
-                    return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                    return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                 }
             }
             
@@ -505,7 +506,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
             
             // 方法4: 如果手动提取失败，尝试使用原始的extractTokenUsage方法作为后备
             log.info("Zhipuai API manual extraction failed, trying original extractTokenUsage method");
-            TokenUsage fallbackUsage = extractTokenUsage(response);
+            ChatTokenUsage fallbackUsage = extractTokenUsage(response);
             if (fallbackUsage.getTotalTokens() > 0) {
                 log.info("Zhipuai API fallback extraction successful: {}", fallbackUsage);
                 return fallbackUsage;
@@ -513,7 +514,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
             
             // 方法5: 如果所有方法都失败，尝试估算token使用量
             log.info("Zhipuai API all extraction methods failed, attempting to estimate token usage");
-            TokenUsage estimatedUsage = estimateTokenUsageFromResponse(response);
+            ChatTokenUsage estimatedUsage = estimateTokenUsageFromResponse(response);
             if (estimatedUsage.getTotalTokens() > 0) {
                 log.info("Zhipuai API estimated token usage: {}", estimatedUsage);
                 return estimatedUsage;
@@ -525,7 +526,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
             // 如果手动提取出错，尝试使用原始的extractTokenUsage方法作为后备
             try {
                 log.info("Zhipuai API manual extraction error, trying original extractTokenUsage method");
-                TokenUsage fallbackUsage = extractTokenUsage(response);
+                ChatTokenUsage fallbackUsage = extractTokenUsage(response);
                 if (fallbackUsage.getTotalTokens() > 0) {
                     log.info("Zhipuai API fallback extraction successful after error: {}", fallbackUsage);
                     return fallbackUsage;
@@ -536,7 +537,7 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
         }
         
         log.warn("Zhipuai API all token extraction methods failed, returning zeros");
-        return new TokenUsage(0, 0, 0);
+        return new ChatTokenUsage(0, 0, 0);
     }
 
     /**
@@ -545,10 +546,10 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
      * @param response ChatResponse对象
      * @return 估算的TokenUsage对象
      */
-    private TokenUsage estimateTokenUsageFromResponse(org.springframework.ai.chat.model.ChatResponse response) {
+    private ChatTokenUsage estimateTokenUsageFromResponse(org.springframework.ai.chat.model.ChatResponse response) {
         try {
             if (response == null) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             // 获取输出文本
@@ -563,11 +564,11 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
             log.info("Zhipuai API estimated tokens - output: {} chars -> {} tokens, estimated prompt: {} tokens, total: {} tokens", 
                     outputText.length(), completionTokens, promptTokens, totalTokens);
             
-            return new TokenUsage(promptTokens, completionTokens, totalTokens);
+            return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
             
         } catch (Exception e) {
             log.error("Error estimating token usage", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
     
@@ -639,15 +640,15 @@ public class SpringAIZhipuaiService extends BaseSpringAIService {
             }
             
             // 测试手动token提取
-            TokenUsage manualUsage = extractZhipuaiTokenUsage(response);
+            ChatTokenUsage manualUsage = extractZhipuaiTokenUsage(response);
             log.info("Zhipuai API test manual token extraction result: {}", manualUsage);
             
             // 测试原始token提取
-            TokenUsage originalUsage = extractTokenUsage(response);
+            ChatTokenUsage originalUsage = extractTokenUsage(response);
             log.info("Zhipuai API test original token extraction result: {}", originalUsage);
             
             // 测试token估算功能
-            TokenUsage estimatedUsage = estimateTokenUsageFromResponse(response);
+            ChatTokenUsage estimatedUsage = estimateTokenUsageFromResponse(response);
             log.info("Zhipuai API test estimated token usage result: {}", estimatedUsage);
             
             // 测试token估算算法
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/service/BaseSpringAIService.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/service/BaseSpringAIService.java
index dc7e823095..b8a425e52f 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/springai/service/BaseSpringAIService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/service/BaseSpringAIService.java
@@ -721,11 +721,6 @@ public abstract class BaseSpringAIService implements SpringAIService {
         processPromptSse(aiPrompt, robot, messageProtobufQuery, messageProtobufReply, emitter, fullPromptContent);
     }
 
-    // private String createAndProcessPromptSync(String query, String context, RobotProtobuf robot,
-    //         MessageProtobuf messageProtobufQuery, MessageProtobuf messageProtobufReply) {
-    //     return createAndProcessPromptSyncWithPrompt(query, context, robot, messageProtobufQuery, messageProtobufReply).getResponse();
-    // }
-
     private PromptResult createAndProcessPromptSyncWithPrompt(String query, String context, RobotProtobuf robot,
             MessageProtobuf messageProtobufQuery, MessageProtobuf messageProtobufReply) {
         
@@ -1145,7 +1140,7 @@ public abstract class BaseSpringAIService implements SpringAIService {
      * @param response ChatResponse from AI service
      * @return TokenUsage object containing prompt and completion tokens
      */
-    protected TokenUsage extractTokenUsage(Object response) {
+    protected ChatTokenUsage extractTokenUsage(Object response) {
         try {
             if (response instanceof org.springframework.ai.chat.model.ChatResponse) {
                 org.springframework.ai.chat.model.ChatResponse chatResponse = (org.springframework.ai.chat.model.ChatResponse) response;
@@ -1306,7 +1301,7 @@ public abstract class BaseSpringAIService implements SpringAIService {
                     
                     log.info("BaseSpringAIService extractTokenUsage extracted tokens - prompt: {}, completion: {}, total: {}", 
                             prompt, completion, total);
-                    return new TokenUsage(prompt, completion, total);
+                    return new ChatTokenUsage(prompt, completion, total);
                 }
             }
         } catch (Exception e) {
@@ -1315,7 +1310,7 @@ public abstract class BaseSpringAIService implements SpringAIService {
         
         // 如果无法提取，返回默认值
         log.warn("BaseSpringAIService extractTokenUsage could not extract token usage, returning zeros");
-        return new TokenUsage(0, 0, 0);
+        return new ChatTokenUsage(0, 0, 0);
     }
 
     /**
@@ -1421,21 +1416,21 @@ public abstract class BaseSpringAIService implements SpringAIService {
     /**
      * Token usage data class
      */
-    protected static class TokenUsage {
-        private final long promptTokens;
-        private final long completionTokens;
-        private final long totalTokens;
+    // protected static class TokenUsage {
+    //     private final long promptTokens;
+    //     private final long completionTokens;
+    //     private final long totalTokens;
 
-        public TokenUsage(long promptTokens, long completionTokens, long totalTokens) {
-            this.promptTokens = promptTokens;
-            this.completionTokens = completionTokens;
-            this.totalTokens = totalTokens;
-        }
+    //     public TokenUsage(long promptTokens, long completionTokens, long totalTokens) {
+    //         this.promptTokens = promptTokens;
+    //         this.completionTokens = completionTokens;
+    //         this.totalTokens = totalTokens;
+    //     }
 
-        public long getPromptTokens() { return promptTokens; }
-        public long getCompletionTokens() { return completionTokens; }
-        public long getTotalTokens() { return totalTokens; }
-    }
+    //     public long getPromptTokens() { return promptTokens; }
+    //     public long getCompletionTokens() { return completionTokens; }
+    //     public long getTotalTokens() { return totalTokens; }
+    // }
 
     // 带prompt参数的抽象方法重载
     protected abstract void processPromptWebsocket(Prompt prompt, RobotProtobuf robot, MessageProtobuf messageProtobufQuery,
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/springai/service/ChatTokenUsage.java b/modules/ai/src/main/java/com/bytedesk/ai/springai/service/ChatTokenUsage.java
new file mode 100644
index 0000000000..ba224a4625
--- /dev/null
+++ b/modules/ai/src/main/java/com/bytedesk/ai/springai/service/ChatTokenUsage.java
@@ -0,0 +1,29 @@
+/*
+ * @Author: jackning 270580156@qq.com
+ * @Date: 2025-08-21 12:42:33
+ * @LastEditors: jackning 270580156@qq.com
+ * @LastEditTime: 2025-08-21 13:05:30
+ * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
+ *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
+ *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
+ *  Business Source License 1.1: https://github.com/Bytedesk/bytedesk/blob/main/LICENSE 
+ *  contact: 270580156@qq.com 
+ * 
+ * Copyright (c) 2025 by bytedesk.com, All Rights Reserved. 
+ */
+package com.bytedesk.ai.springai.service;
+
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+import lombok.Setter;
+
+@Getter
+@Setter
+@AllArgsConstructor
+@NoArgsConstructor
+public class ChatTokenUsage {
+    private long promptTokens;
+    private long completionTokens;
+    private long totalTokens;
+}
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatConfig.java b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatConfig.java
index b27816b662..632f172471 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatConfig.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatConfig.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-19 09:39:15
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 09:33:50
+ * @LastEditTime: 2025-08-21 12:22:06
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -41,7 +41,7 @@ public class ZhipuaiChatConfig {
     @Value("${spring.ai.zhipuai.api-key:}")
     private String apiKey;
 
-    @Value("${spring.ai.zhipuai.chat.options.model:glm-4}")
+    @Value("${spring.ai.zhipuai.chat.options.model:glm-4-flash}")
     private String model;
 
     @Value("${spring.ai.zhipuai.chat.options.temperature:0.7}")
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatService.java b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatService.java
new file mode 100644
index 0000000000..332062e279
--- /dev/null
+++ b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiChatService.java
@@ -0,0 +1,624 @@
+/*
+ * @Author: jackning 270580156@qq.com
+ * @Date: 2025-08-21 12:26:02
+ * @LastEditors: jackning 270580156@qq.com
+ * @LastEditTime: 2025-08-21 12:37:21
+ * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
+ *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
+ *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
+ *  Business Source License 1.1: https://github.com/Bytedesk/bytedesk/blob/main/LICENSE 
+ *  contact: 270580156@qq.com 
+ * 
+ * Copyright (c) 2025 by bytedesk.com, All Rights Reserved. 
+ */
+package com.bytedesk.ai.zhipuai;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.stereotype.Service;
+
+import com.zhipu.oapi.ClientV4;
+import com.zhipu.oapi.Constants;
+import com.zhipu.oapi.service.v4.model.ChatCompletionRequest;
+import com.zhipu.oapi.service.v4.model.ChatFunction;
+import com.zhipu.oapi.service.v4.model.ChatMessage;
+import com.zhipu.oapi.service.v4.model.ChatMessageAccumulator;
+import com.zhipu.oapi.service.v4.model.ChatMessageRole;
+import com.zhipu.oapi.service.v4.model.ChatMeta;
+import com.zhipu.oapi.service.v4.model.ChatTool;
+import com.zhipu.oapi.service.v4.model.ChatToolType;
+import com.zhipu.oapi.service.v4.model.ModelApiResponse;
+import com.zhipu.oapi.service.v4.model.ModelData;
+import com.zhipu.oapi.service.v4.model.QueryModelResultRequest;
+import com.zhipu.oapi.service.v4.model.QueryModelResultResponse;
+import com.zhipu.oapi.service.v4.model.WebSearch;
+
+import lombok.extern.slf4j.Slf4j;
+import reactor.core.publisher.Flux;
+
+@Slf4j
+@Service
+@ConditionalOnProperty(prefix = "spring.ai.zhipuai.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
+public class ZhipuaiChatService {
+
+    @Autowired
+    @Qualifier("zhipuaiChatClient")
+    private ClientV4 client;
+
+    @Autowired
+    private ZhipuaiChatConfig zhipuaiChatConfig;
+
+    /**
+     * 角色扮演聊天
+     */
+    public String rolePlayChat(String message, String userInfo, String botInfo, String botName, String userName) {
+        // 添加请求日志
+        log.info("Zhipuai API role play request - message length: {}, userInfo: {}, botInfo: {}, botName: {}, userName: {}", 
+                message.length(), userInfo, botInfo, botName, userName);
+        
+        long startTime = System.currentTimeMillis();
+        
+        // 使用默认client进行角色扮演聊天
+        ClientV4 chatClient = client;
+        
+        try {
+            if (client == null) {
+                log.error("Zhipuai API client is null");
+                return "Zhipuai client is not available";
+            }
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            ChatMeta meta = new ChatMeta();
+            meta.setUser_info(userInfo);
+            meta.setBot_info(botInfo);
+            meta.setBot_name(botName);
+            meta.setUser_name(userName);
+
+            String requestId = String.format("roleplay-%d", System.currentTimeMillis());
+            
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(Constants.ModelCharGLM3)
+                    .stream(Boolean.FALSE)
+                    .invokeMethod(Constants.invokeMethod)
+                    .messages(messages)
+                    .meta(meta)
+                    .requestId(requestId)
+                    .build();
+            
+            log.info("Zhipuai API role play invoking model with requestId: {}", requestId);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess() && response.getData() != null) {
+                log.info("Zhipuai API role play response success");
+                
+                // 提取token使用情况
+                // TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
+                // log.info("Zhipuai API role play tokenUsage: {}", tokenUsage);
+                
+                Object content = response.getData().getChoices().get(0).getMessage().getContent();
+                return content != null ? content.toString() : null;
+            } else {
+                log.error("Zhipuai API role play error: {}", response.getError());
+                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
+            }
+        } catch (Exception e) {
+            log.error("Zhipuai API role play error: ", e);
+            return "Error: " + e.getMessage();
+        } finally {
+            long responseTime = System.currentTimeMillis() - startTime;
+            log.info("Zhipuai API role play completed in {}ms", responseTime);
+        }
+    }
+
+    /**
+     * Function Calling 聊天
+     */
+    public String functionCallingChat(String message, List<ChatFunction> functions) {
+        return functionCallingChat(message, null, null, functions);
+    }
+
+    /**
+     * Function Calling 聊天（带自定义参数）
+     */
+    public String functionCallingChat(String message, String model, Double temperature, List<ChatFunction> functions) {
+        // 添加请求日志
+        log.info("Zhipuai API function calling request - message length: {}, model: {}, temperature: {}, functions count: {}", 
+                message.length(), model, temperature, functions != null ? functions.size() : 0);
+        
+        long startTime = System.currentTimeMillis();
+        
+        // 使用默认client进行函数调用聊天
+        ClientV4 chatClient = client;
+        
+        try {
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            List<ChatTool> chatToolList = new ArrayList<>();
+            if (functions != null) {
+                for (ChatFunction function : functions) {
+                    ChatTool chatTool = new ChatTool();
+                    chatTool.setType(ChatToolType.FUNCTION.value());
+                    chatTool.setFunction(function);
+                    chatToolList.add(chatTool);
+                }
+            }
+
+            String requestId = String.format("function-%d", System.currentTimeMillis());
+            
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(model != null ? model : zhipuaiChatConfig.getModel())
+                    .stream(Boolean.FALSE)
+                    .invokeMethod(Constants.invokeMethod)
+                    .messages(messages)
+                    .requestId(requestId)
+                    .temperature(temperature != null ? temperature.floatValue() : (float) zhipuaiChatConfig.getTemperature())
+                    .tools(chatToolList)
+                    .toolChoice("auto")
+                    .build();
+            
+            log.info("Zhipuai API function calling invoking model with requestId: {}", requestId);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess() && response.getData() != null) {
+                log.info("Zhipuai API function calling response success");
+                
+                // 提取token使用情况
+                // TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
+                // log.info("Zhipuai API function calling tokenUsage: {}", tokenUsage);
+                
+                Object content = response.getData().getChoices().get(0).getMessage().getContent();
+                return content != null ? content.toString() : null;
+            } else {
+                log.error("Zhipuai API function calling error: {}", response.getError());
+                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
+            }
+        } catch (Exception e) {
+            log.error("Zhipuai API function calling error: ", e);
+            return "Error: " + e.getMessage();
+        } finally {
+            long responseTime = System.currentTimeMillis() - startTime;
+            log.info("Zhipuai API function calling completed in {}ms", responseTime);
+        }
+    }
+
+    /**
+     * 流式 Function Calling 聊天
+     */
+    public Flux<String> functionCallingChatStream(String message, List<ChatFunction> functions) {
+        return functionCallingChatStream(message, null, null, functions);
+    }
+
+    /**
+     * 流式 Function Calling 聊天（带自定义参数）
+     */
+    public Flux<String> functionCallingChatStream(String message, String model, Double temperature, List<ChatFunction> functions) {
+        // 使用默认client进行流式函数调用聊天
+        ClientV4 chatClient = client;
+        
+        try {
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            List<ChatTool> chatToolList = new ArrayList<>();
+            if (functions != null) {
+                for (ChatFunction function : functions) {
+                    ChatTool chatTool = new ChatTool();
+                    chatTool.setType(ChatToolType.FUNCTION.value());
+                    chatTool.setFunction(function);
+                    chatToolList.add(chatTool);
+                }
+            }
+
+            String requestId = String.format("function-stream-%d", System.currentTimeMillis());
+            
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(model != null ? model : zhipuaiChatConfig.getModel())
+                    .stream(Boolean.TRUE)
+                    .messages(messages)
+                    .requestId(requestId)
+                    .temperature(temperature != null ? temperature.floatValue() : (float) zhipuaiChatConfig.getTemperature())
+                    .tools(chatToolList)
+                    .toolChoice("auto")
+                    .build();
+            
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess()) {
+                return Flux.from(mapStreamToAccumulator(response.getFlowable()).map(accumulator -> {
+                    log.info("Zhipuai API function calling accumulator received: {}", accumulator);
+                    log.info("Zhipuai API function calling accumulator class: {}", accumulator.getClass().getName());
+                    
+                    Object delta = accumulator.getDelta();
+                    log.info("Zhipuai API function calling delta: {}", delta);
+                    log.info("Zhipuai API function calling delta class: {}", delta != null ? delta.getClass().getName() : "null");
+                    
+                    // 处理tool_calls（如果有的话）
+                    if (delta != null && delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
+                        com.zhipu.oapi.service.v4.model.ChatMessage deltaMessage = (com.zhipu.oapi.service.v4.model.ChatMessage) delta;
+                        if (deltaMessage.getTool_calls() != null) {
+                            log.info("Zhipuai API function calling tool_calls: {}", deltaMessage.getTool_calls());
+                        }
+                    }
+                    
+                    if (delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
+                        Object content = ((com.zhipu.oapi.service.v4.model.ChatMessage) delta).getContent();
+                        log.info("Zhipuai API function calling content: {}", content);
+                        return content != null ? content.toString() : "";
+                    } else if (delta != null) {
+                        String deltaStr = delta.toString();
+                        log.info("Zhipuai API function calling delta as string: {}", deltaStr);
+                        
+                        // 尝试从JSON字符串中提取content字段
+                        // String extractedContent = extractContentFromDeltaString(deltaStr);
+                        // if (extractedContent != null && !extractedContent.isEmpty()) {
+                        //     log.info("Zhipuai API function calling extracted content: {}", extractedContent);
+                        //     return extractedContent;
+                        // } else if (!isEmptyAssistantMessage(deltaStr)) {
+                        //     return deltaStr;
+                        // } else {
+                        //     return "";
+                        // }
+                    }
+                    return "";
+                }));
+            } else {
+                log.error("Zhipuai API error: {}", response.getError());
+                return Flux.just("Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error"));
+            }
+        } catch (Exception e) {
+            log.error("Error in functionCallingChatStream", e);
+            return Flux.just("Error: " + e.getMessage());
+        }
+    }
+
+    /**
+     * 图像生成 - 暂不支持，需要等待SDK更新
+     */
+    public String generateImage(String prompt) {
+        return "Image generation is not supported in current SDK version";
+    }
+
+    /**
+     * 图像生成（带请求ID）- 暂不支持，需要等待SDK更新
+     */
+    public String generateImage(String prompt, String requestId) {
+        return "Image generation is not supported in current SDK version";
+    }
+
+    /**
+     * 向量嵌入 - 暂不支持，需要等待SDK更新
+     */
+    public List<Double> getEmbedding(String text) {
+        log.warn("Embedding is not supported in current SDK version");
+        return new ArrayList<>();
+    }
+
+    /**
+     * 批量向量嵌入 - 暂不支持，需要等待SDK更新
+     */
+    public List<List<Double>> getEmbeddings(List<String> texts) {
+        log.warn("Embeddings is not supported in current SDK version");
+        return new ArrayList<>();
+    }
+
+    /**
+     * 语音合成 - 暂不支持，需要等待SDK更新
+     */
+    public File generateSpeech(String text, String voice, String responseFormat) {
+        log.warn("Speech synthesis is not supported in current SDK version");
+        return null;
+    }
+
+    /**
+     * 自定义语音合成 - 暂不支持，需要等待SDK更新
+     */
+    public File generateCustomSpeech(String text, String voiceText, File voiceData, String responseFormat) {
+        log.warn("Custom voice synthesis is not supported in current SDK version");
+        return null;
+    }
+
+    /**
+     * 文件上传 - 暂不支持，需要等待SDK更新
+     */
+    public String uploadFile(String filePath, String purpose) {
+        return "File upload is not supported in current SDK version";
+    }
+
+    /**
+     * 查询文件列表 - 暂不支持，需要等待SDK更新
+     */
+    public List<Map<String, Object>> queryFiles() {
+        log.warn("File query is not supported in current SDK version");
+        return new ArrayList<>();
+    }
+
+    /**
+     * 下载文件内容 - 暂不支持，需要等待SDK更新
+     */
+    public File downloadFile(String fileId, String outputPath) {
+        log.warn("File download is not supported in current SDK version");
+        return null;
+    }
+
+    /**
+     * 创建微调任务 - 暂不支持，需要等待SDK更新
+     */
+    public String createFineTuningJob(String model, String trainingFile) {
+        return "Fine-tuning is not supported in current SDK version";
+    }
+
+    /**
+     * 查询微调任务 - 暂不支持，需要等待SDK更新
+     */
+    public Map<String, Object> queryFineTuningJob(String jobId) {
+        log.warn("Fine-tuning query is not supported in current SDK version");
+        return new HashMap<>();
+    }
+
+    /**
+     * 异步聊天
+     */
+    public String chatAsync(String message) {
+        // 添加请求日志
+        log.info("Zhipuai API async request - message length: {}", message.length());
+        
+        long startTime = System.currentTimeMillis();
+        
+        // 使用默认client进行异步聊天
+        ClientV4 chatClient = client;
+        
+        try {
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(zhipuaiChatConfig.getModel())
+                    .stream(Boolean.FALSE)
+                    .invokeMethod(Constants.invokeMethodAsync)
+                    .messages(messages)
+                    .build();
+            
+            log.info("Zhipuai API async invoking model");
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess() && response.getData() != null) {
+                String taskId = response.getData().getId();
+                log.info("Zhipuai API async task created with taskId: {}", taskId);
+                
+                // 轮询获取结果
+                return pollAsyncResult(taskId);
+            } else {
+                log.error("Zhipuai API async error: {}", response.getError());
+                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
+            }
+        } catch (Exception e) {
+            log.error("Zhipuai API async error: ", e);
+            return "Error: " + e.getMessage();
+        } finally {
+            long responseTime = System.currentTimeMillis() - startTime;
+            log.info("Zhipuai API async completed in {}ms", responseTime);
+        }
+    }
+
+    /**
+     * 轮询异步结果
+     */
+    private String pollAsyncResult(String taskId) {
+        log.info("Zhipuai API starting async result polling for taskId: {}", taskId);
+        
+        try {
+            int maxAttempts = 30; // 最多轮询30次
+            int attempt = 0;
+            
+            while (attempt < maxAttempts) {
+                log.debug("Zhipuai API polling attempt {}/{} for taskId: {}", attempt + 1, maxAttempts, taskId);
+                
+                QueryModelResultRequest request = new QueryModelResultRequest();
+                request.setTaskId(taskId);
+                
+                QueryModelResultResponse response = client.queryModelResult(request);
+                
+                if (response.isSuccess() && response.getData() != null) {
+                    Object taskStatus = response.getData().getTaskStatus();
+                    log.debug("Zhipuai API task status: {} for taskId: {}", taskStatus, taskId);
+                    
+                    if ("SUCCESS".equals(taskStatus.toString())) {
+                        log.info("Zhipuai API async task completed successfully for taskId: {}", taskId);
+                        Object content = response.getData().getChoices().get(0).getMessage().getContent();
+                        return content != null ? content.toString() : null;
+                    } else if ("FAILED".equals(taskStatus.toString())) {
+                        log.error("Zhipuai API async task failed for taskId: {}", taskId);
+                        return "Task failed";
+                    }
+                } else {
+                    log.warn("Zhipuai API async polling response not successful for taskId: {}", taskId);
+                }
+                
+                attempt++;
+                Thread.sleep(2000); // 等待2秒后重试
+            }
+            
+            log.error("Zhipuai API async task timeout after {} attempts for taskId: {}", maxAttempts, taskId);
+            return "Task timeout after " + maxAttempts + " attempts";
+        } catch (Exception e) {
+            log.error("Zhipuai API error polling async result for taskId: {}", taskId, e);
+            return "Error: " + e.getMessage();
+        }
+    }
+
+    /**
+     * 带Web搜索的聊天
+     */
+    public String chatWithWebSearch(String message, String searchQuery) {
+        // 添加请求日志
+        log.info("Zhipuai API web search request - message length: {}, searchQuery: {}", 
+                message.length(), searchQuery);
+        
+        long startTime = System.currentTimeMillis();
+        
+        // 使用默认client进行网络搜索聊天
+        ClientV4 chatClient = client;
+        
+        try {
+
+            List<ChatMessage> messages = new ArrayList<>();
+            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
+            messages.add(chatMessage);
+
+            List<ChatTool> chatToolList = new ArrayList<>();
+            
+            // 添加Web搜索工具
+            ChatTool webSearchTool = new ChatTool();
+            webSearchTool.setType(ChatToolType.WEB_SEARCH.value());
+            WebSearch webSearch = new WebSearch();
+            webSearch.setSearch_query(searchQuery);
+            webSearch.setSearch_result(true);
+            webSearch.setEnable(true);
+            webSearchTool.setWeb_search(webSearch);
+            chatToolList.add(webSearchTool);
+
+            String requestId = String.format("websearch-%d", System.currentTimeMillis());
+            
+            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+                    .model(zhipuaiChatConfig.getModel())
+                    .stream(Boolean.FALSE)
+                    .invokeMethod(Constants.invokeMethod)
+                    .messages(messages)
+                    .requestId(requestId)
+                    .tools(chatToolList)
+                    .toolChoice("auto")
+                    .build();
+            
+            log.info("Zhipuai API web search invoking model with requestId: {}", requestId);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            if (response.isSuccess() && response.getData() != null) {
+                log.info("Zhipuai API web search response success");
+                
+                // 提取token使用情况
+                // TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
+                // log.info("Zhipuai API web search tokenUsage: {}", tokenUsage);
+                
+                Object content = response.getData().getChoices().get(0).getMessage().getContent();
+                return content != null ? content.toString() : null;
+            } else {
+                log.error("Zhipuai API web search error: {}", response.getError());
+                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
+            }
+        } catch (Exception e) {
+            log.error("Zhipuai API web search error: ", e);
+            return "Error: " + e.getMessage();
+        } finally {
+            long responseTime = System.currentTimeMillis() - startTime;
+            log.info("Zhipuai API web search completed in {}ms", responseTime);
+        }
+    }
+
+    /**
+     * 语音模型聊天 - 暂不支持，需要等待SDK更新
+     */
+    public String chatWithVoice(String message) {
+        log.warn("Voice chat is not supported in current SDK version");
+        return "Voice chat is not supported in current SDK version";
+    }
+
+
+    /**
+     * 将流式响应转换为Accumulator，参考官方示例
+     */
+    private io.reactivex.Flowable<ChatMessageAccumulator> mapStreamToAccumulator(io.reactivex.Flowable<ModelData> flowable) {
+        return flowable.map(chunk -> {
+            return new ChatMessageAccumulator(
+                chunk.getChoices().get(0).getDelta(), 
+                null, 
+                chunk.getChoices().get(0), 
+                chunk.getUsage(), 
+                chunk.getCreated(), 
+                chunk.getId()
+            );
+        });
+    }
+
+    /**
+     * 测试流式响应功能
+     * 用于调试流式响应问题
+     */
+    public void testStreamResponse() {
+        // 使用默认client进行测试
+        // ClientV4 chatClient = client;
+        
+        try {
+            log.info("Zhipuai API testing stream response...");
+            
+            // // 创建一个简单的测试请求
+            // String testMessage = "Hello, this is a test message for stream response.";
+            // ChatCompletionRequest chatCompletionRequest = createDynamicRequest(null, testMessage, true);
+            
+            // log.info("Zhipuai API making stream test call with message: {}", testMessage);
+            
+            // // 调用API
+            // ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
+            
+            // log.info("Zhipuai API stream test response success: {}", response.isSuccess());
+            
+            // if (response.isSuccess()) {
+            //     log.info("Zhipuai API stream test starting flowable processing");
+                
+            //     final int[] messageCount = {0};
+                
+            //     // 使用AtomicBoolean来标记是否是第一个消息，参考官方示例
+            //     // java.util.concurrent.atomic.AtomicBoolean isFirst = new java.util.concurrent.atomic.AtomicBoolean(true);
+                
+            //     mapStreamToAccumulator(response.getFlowable())
+            //             .doOnNext(accumulator -> {
+            //                 messageCount[0]++;
+            //                 log.info("Zhipuai API stream test message #{}: accumulator={}", messageCount[0], accumulator);
+            //                 log.info("Zhipuai API stream test message #{}: accumulator class={}", messageCount[0], accumulator.getClass().getName());
+                            
+            //                 Object delta = accumulator.getDelta();
+            //                 log.info("Zhipuai API stream test message #{}: delta={}", messageCount[0], delta);
+            //                 log.info("Zhipuai API stream test message #{}: delta class={}", messageCount[0], delta != null ? delta.getClass().getName() : "null");
+                            
+            //                 if (delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
+            //                     Object content = ((com.zhipu.oapi.service.v4.model.ChatMessage) delta).getContent();
+            //                     log.info("Zhipuai API stream test message #{}: content={}", messageCount[0], content);
+            //                 } else {
+            //                     log.info("Zhipuai API stream test message #{}: delta is not ChatMessage", messageCount[0]);
+            //                 }
+            //             })
+            //             .doOnComplete(() -> {
+            //                 log.info("Zhipuai API stream test completed, total messages: {}", messageCount[0]);
+            //             })
+            //             .doOnError(error -> {
+            //                 log.error("Zhipuai API stream test error: ", error);
+            //             })
+            //             .subscribe();
+            // } else {
+            //     log.error("Zhipuai API stream test failed: {}", response.getError());
+            // }
+            
+        } catch (Exception e) {
+            log.error("Zhipuai API test stream response error", e);
+        }
+    }
+
+
+    
+}
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiController.java b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiController.java
index 2026580336..380d2574a4 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiController.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiController.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-19 09:39:15
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 15:45:45
+ * @LastEditTime: 2025-08-21 12:38:11
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -49,6 +49,7 @@ public class ZhipuaiController {
 
     private final BytedeskProperties bytedeskProperties;
     private final ZhipuaiService zhipuaiService;
+    private final ZhipuaiChatService zhipuaiChatService;
     private final ExecutorService executorService = Executors.newCachedThreadPool();
 
     /**
@@ -140,7 +141,7 @@ public class ZhipuaiController {
         }
         
         try {
-            String result = zhipuaiService.rolePlayChat(message, userInfo, botInfo, botName, userName);
+            String result = zhipuaiChatService.rolePlayChat(message, userInfo, botInfo, botName, userName);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in role play chat", e);
@@ -191,7 +192,7 @@ public class ZhipuaiController {
                 functions.add(function);
             }
             
-            String result = zhipuaiService.functionCallingChat(message, functions);
+            String result = zhipuaiChatService.functionCallingChat(message, functions);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in function calling chat", e);
@@ -224,7 +225,7 @@ public class ZhipuaiController {
                 functions.add(function);
             }
             
-            return zhipuaiService.functionCallingChatStream(message, functions);
+            return zhipuaiChatService.functionCallingChatStream(message, functions);
         } catch (Exception e) {
             log.error("Error in function calling chat stream", e);
             return Flux.just("Error: " + e.getMessage());
@@ -303,7 +304,7 @@ public class ZhipuaiController {
             functions.add(weatherFunction);
             
             String message = "请告诉我" + city + "的天气情况";
-            String result = zhipuaiService.functionCallingChat(message, functions);
+            String result = zhipuaiChatService.functionCallingChat(message, functions);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in weather function call", e);
@@ -351,7 +352,7 @@ public class ZhipuaiController {
             functions.add(flightFunction);
             
             String message = "请查询从" + from + "到" + to + "的航班价格";
-            String result = zhipuaiService.functionCallingChat(message, functions);
+            String result = zhipuaiChatService.functionCallingChat(message, functions);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in flight function call", e);
@@ -377,7 +378,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Prompt is required"));
             }
             
-            String result = zhipuaiService.generateImage(prompt, requestId);
+            String result = zhipuaiChatService.generateImage(prompt, requestId);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in image generation", e);
@@ -402,7 +403,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Text is required"));
             }
             
-            List<Double> result = zhipuaiService.getEmbedding(text);
+            List<Double> result = zhipuaiChatService.getEmbedding(text);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in embedding", e);
@@ -428,7 +429,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Texts are required"));
             }
             
-            List<List<Double>> result = zhipuaiService.getEmbeddings(texts);
+            List<List<Double>> result = zhipuaiChatService.getEmbeddings(texts);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in embeddings", e);
@@ -455,7 +456,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Text is required"));
             }
             
-            File result = zhipuaiService.generateSpeech(text, voice, responseFormat);
+            File result = zhipuaiChatService.generateSpeech(text, voice, responseFormat);
             if (result != null) {
                 return ResponseEntity.ok(JsonResult.success("Speech generated: " + result.getAbsolutePath()));
             } else {
@@ -496,7 +497,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Voice data file not found"));
             }
             
-            File result = zhipuaiService.generateCustomSpeech(text, voiceText, voiceData, responseFormat);
+            File result = zhipuaiChatService.generateCustomSpeech(text, voiceText, voiceData, responseFormat);
             if (result != null) {
                 return ResponseEntity.ok(JsonResult.success("Custom voice generated: " + result.getAbsolutePath()));
             } else {
@@ -530,7 +531,7 @@ public class ZhipuaiController {
                 purpose = "fine-tune";
             }
             
-            String result = zhipuaiService.uploadFile(filePath, purpose);
+            String result = zhipuaiChatService.uploadFile(filePath, purpose);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in file upload", e);
@@ -549,7 +550,7 @@ public class ZhipuaiController {
         }
         
         try {
-            List<Map<String, Object>> result = zhipuaiService.queryFiles();
+            List<Map<String, Object>> result = zhipuaiChatService.queryFiles();
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in query files", e);
@@ -579,7 +580,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Output path is required"));
             }
             
-            File result = zhipuaiService.downloadFile(fileId, outputPath);
+            File result = zhipuaiChatService.downloadFile(fileId, outputPath);
             if (result != null) {
                 return ResponseEntity.ok(JsonResult.success("File downloaded: " + result.getAbsolutePath()));
             } else {
@@ -613,7 +614,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Training file is required"));
             }
             
-            String result = zhipuaiService.createFineTuningJob(model, trainingFile);
+            String result = zhipuaiChatService.createFineTuningJob(model, trainingFile);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in create fine-tuning job", e);
@@ -632,7 +633,7 @@ public class ZhipuaiController {
         }
         
         try {
-            Map<String, Object> result = zhipuaiService.queryFineTuningJob(jobId);
+            Map<String, Object> result = zhipuaiChatService.queryFineTuningJob(jobId);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in query fine-tuning job", e);
@@ -657,7 +658,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Message is required"));
             }
             
-            String result = zhipuaiService.chatAsync(message);
+            String result = zhipuaiChatService.chatAsync(message);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in async chat", e);
@@ -687,7 +688,7 @@ public class ZhipuaiController {
                 searchQuery = message; // 如果没有指定搜索查询，使用消息作为搜索查询
             }
             
-            String result = zhipuaiService.chatWithWebSearch(message, searchQuery);
+            String result = zhipuaiChatService.chatWithWebSearch(message, searchQuery);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in web search chat", e);
@@ -712,7 +713,7 @@ public class ZhipuaiController {
                 return ResponseEntity.ok(JsonResult.error("Message is required"));
             }
             
-            String result = zhipuaiService.chatWithVoice(message);
+            String result = zhipuaiChatService.chatWithVoice(message);
             return ResponseEntity.ok(JsonResult.success(result));
         } catch (Exception e) {
             log.error("Error in voice chat", e);
@@ -724,39 +725,39 @@ public class ZhipuaiController {
      * 测试流式响应功能
      * GET http://127.0.0.1:9003/zhipuai/test-stream
      */
-    @GetMapping("/test-stream")
-    public ResponseEntity<JsonResult<?>> testStreamResponse() {
-        if (!bytedeskProperties.getDebug()) {
-            return ResponseEntity.ok(JsonResult.error("Zhipuai service is not available"));
-        }
+    // @GetMapping("/test-stream")
+    // public ResponseEntity<JsonResult<?>> testStreamResponse() {
+    //     if (!bytedeskProperties.getDebug()) {
+    //         return ResponseEntity.ok(JsonResult.error("Zhipuai service is not available"));
+    //     }
         
-        try {
-            zhipuaiService.testStreamResponse();
-            return ResponseEntity.ok(JsonResult.success("Stream response test completed. Check logs for details."));
-        } catch (Exception e) {
-            log.error("Error testing stream response", e);
-            return ResponseEntity.ok(JsonResult.error("Error testing stream response: " + e.getMessage()));
-        }
-    }
+    //     try {
+    //         zhipuaiService.testStreamResponse();
+    //         return ResponseEntity.ok(JsonResult.success("Stream response test completed. Check logs for details."));
+    //     } catch (Exception e) {
+    //         log.error("Error testing stream response", e);
+    //         return ResponseEntity.ok(JsonResult.error("Error testing stream response: " + e.getMessage()));
+    //     }
+    // }
 
     /**
      * 简单流式测试 - 完全按照官方示例代码实现
      * GET http://127.0.0.1:9003/zhipuai/test-simple-stream
      */
-    @GetMapping("/test-simple-stream")
-    public ResponseEntity<JsonResult<?>> testSimpleStream() {
-        if (!bytedeskProperties.getDebug()) {
-            return ResponseEntity.ok(JsonResult.error("Zhipuai service is not available"));
-        }
+    // @GetMapping("/test-simple-stream")
+    // public ResponseEntity<JsonResult<?>> testSimpleStream() {
+    //     if (!bytedeskProperties.getDebug()) {
+    //         return ResponseEntity.ok(JsonResult.error("Zhipuai service is not available"));
+    //     }
         
-        try {
-            zhipuaiService.testSimpleStream();
-            return ResponseEntity.ok(JsonResult.success("Simple stream test completed. Check logs for details."));
-        } catch (Exception e) {
-            log.error("Error testing simple stream", e);
-            return ResponseEntity.ok(JsonResult.error("Error testing simple stream: " + e.getMessage()));
-        }
-    }
+    //     try {
+    //         zhipuaiChatService.testSimpleStream();
+    //         return ResponseEntity.ok(JsonResult.success("Simple stream test completed. Check logs for details."));
+    //     } catch (Exception e) {
+    //         log.error("Error testing simple stream", e);
+    //         return ResponseEntity.ok(JsonResult.error("Error testing simple stream: " + e.getMessage()));
+    //     }
+    // }
 
     /**
      * 在 Bean 销毁时关闭线程池
diff --git a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiService.java b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiService.java
index 05f1b39fd9..a9ef9dfa2f 100644
--- a/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiService.java
+++ b/modules/ai/src/main/java/com/bytedesk/ai/zhipuai/ZhipuaiService.java
@@ -2,7 +2,7 @@
  * @Author: jackning 270580156@qq.com
  * @Date: 2025-02-19 09:39:15
  * @LastEditors: jackning 270580156@qq.com
- * @LastEditTime: 2025-07-16 14:44:47
+ * @LastEditTime: 2025-08-21 12:47:50
  * @Description: bytedesk.com https://github.com/Bytedesk/bytedesk
  *   Please be aware of the BSL license restrictions before installing Bytedesk IM – 
  *  selling, reselling, or hosting Bytedesk IM as a service is a breach of the terms and automatically terminates your rights under the license. 
@@ -15,13 +15,15 @@ package com.bytedesk.ai.zhipuai;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Optional;
 import org.springframework.ai.chat.prompt.Prompt;
 import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+// import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.stereotype.Service;
 import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
 
+import com.bytedesk.ai.provider.LlmProviderEntity;
+import com.bytedesk.ai.provider.LlmProviderRestService;
 import com.bytedesk.ai.robot.RobotLlm;
 import com.bytedesk.ai.robot.RobotProtobuf;
 import com.bytedesk.ai.springai.service.BaseSpringAIService;
@@ -33,21 +35,11 @@ import com.zhipu.oapi.Constants;
 import com.zhipu.oapi.service.v4.model.ChatCompletionRequest;
 import com.zhipu.oapi.service.v4.model.ChatMessage;
 import com.zhipu.oapi.service.v4.model.ChatMessageRole;
-import com.zhipu.oapi.service.v4.model.ChatMeta;
 import com.zhipu.oapi.service.v4.model.ModelApiResponse;
 import com.zhipu.oapi.service.v4.model.ChatMessageAccumulator;
-import com.zhipu.oapi.service.v4.model.ChatTool;
-import com.zhipu.oapi.service.v4.model.ChatToolType;
-import com.zhipu.oapi.service.v4.model.ChatFunction;
 import com.zhipu.oapi.service.v4.model.ModelData;
-import com.zhipu.oapi.service.v4.model.QueryModelResultRequest;
-import com.zhipu.oapi.service.v4.model.QueryModelResultResponse;
-import com.zhipu.oapi.service.v4.model.WebSearch;
-import java.io.File;
-import java.util.HashMap;
-import java.util.Map;
 import lombok.extern.slf4j.Slf4j;
-import reactor.core.publisher.Flux;
+import com.bytedesk.ai.springai.service.ChatTokenUsage;
 
 /**
  * 智谱AI服务类
@@ -56,15 +48,10 @@ import reactor.core.publisher.Flux;
  */
 @Slf4j
 @Service
-@ConditionalOnProperty(prefix = "spring.ai.zhipuai.chat", name = "enabled", havingValue = "true", matchIfMissing = false)
 public class ZhipuaiService extends BaseSpringAIService {
 
     @Autowired
-    @Qualifier("zhipuaiChatClient")
-    private ClientV4 client;
-
-    @Autowired
-    private ZhipuaiChatConfig zhipuaiChatConfig;
+    private LlmProviderRestService llmProviderRestService;
 
     /**
      * 构造函数
@@ -73,10 +60,68 @@ public class ZhipuaiService extends BaseSpringAIService {
         super();
     }
 
+    /**
+     * 根据机器人配置创建动态的ClientV4实例
+     * 
+     * @param llm 机器人LLM配置
+     * @return 配置了特定参数的ClientV4
+     */
+    private ClientV4 createDynamicClient(RobotLlm llm) {
+        if (llm == null || llm.getTextProviderUid() == null) {
+            log.warn("RobotLlm or textProviderUid is null, using default client");
+            // return client; // 使用默认的注入client
+            return null;
+        }
+
+        Optional<LlmProviderEntity> llmProviderOptional = llmProviderRestService.findByUid(llm.getTextProviderUid());
+        if (llmProviderOptional.isEmpty()) {
+            log.warn("LlmProvider with uid {} not found, using default client", llm.getTextProviderUid());
+            // return client; // 使用默认的注入client
+            return null;
+        }
+
+        LlmProviderEntity provider = llmProviderOptional.get();
+        String apiKey = provider.getApiKey();
+        
+        if (apiKey == null || apiKey.trim().isEmpty()) {
+            log.warn("API key is not configured for provider {}, using default client", provider.getUid());
+            // return client; // 使用默认的注入client
+            return null;
+        }
+
+        try {
+            log.info("Creating dynamic Zhipuai client with provider: {} ({})", provider.getName(), provider.getUid());
+            
+            return new ClientV4.Builder(apiKey)
+                    .enableTokenCache()
+                    // .networkConfig(
+                    //     zhipuaiChatConfig.getConnectionTimeout(), 
+                    //     zhipuaiChatConfig.getReadTimeout(), 
+                    //     zhipuaiChatConfig.getWriteTimeout(), 
+                    //     zhipuaiChatConfig.getPingInterval(), 
+                    //     TimeUnit.SECONDS
+                    // )
+                    // .connectionPool(new okhttp3.ConnectionPool(
+                    //     zhipuaiChatConfig.getMaxIdleConnections(), 
+                    //     zhipuaiChatConfig.getKeepAliveDuration(), 
+                    //     TimeUnit.SECONDS
+                    // ))
+                    .build();
+        } catch (Exception e) {
+            log.error("Failed to create dynamic Zhipuai client for provider {}, using default client", provider.getUid(), e);
+            // return client; // 使用默认的注入client
+            return null;
+        }
+    }
+
     /**
      * 根据机器人配置创建动态的聊天选项
      */
     private ChatCompletionRequest createDynamicRequest(RobotLlm llm, String message, boolean stream) {
+        if (llm == null || llm.getTextModel() == null) {
+            log.warn("RobotLlm or textModel is null, using default model");
+            throw new IllegalArgumentException("RobotLlm or textModel cannot be null");
+        }
         List<ChatMessage> messages = new ArrayList<>();
         ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
         messages.add(chatMessage);
@@ -84,12 +129,12 @@ public class ZhipuaiService extends BaseSpringAIService {
         String requestId = String.format("zhipuai-%d", System.currentTimeMillis());
         
         ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-                .model(llm != null && llm.getTextModel() != null ? llm.getTextModel() : zhipuaiChatConfig.getModel())
+                .model(llm.getTextModel())
                 .stream(stream)
                 .invokeMethod(Constants.invokeMethod)
                 .messages(messages)
                 .requestId(requestId)
-                .temperature(llm != null ? llm.getTemperature().floatValue() : (float) zhipuaiChatConfig.getTemperature())
+                .temperature(llm.getTemperature().floatValue())
                 .build();
 
         // 添加请求日志
@@ -130,12 +175,12 @@ public class ZhipuaiService extends BaseSpringAIService {
         String requestId = String.format("zhipuai-%d", System.currentTimeMillis());
         
         ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-                .model(llm != null && llm.getTextModel() != null ? llm.getTextModel() : zhipuaiChatConfig.getModel())
+                .model(llm.getTextModel())
                 .stream(stream)
                 .invokeMethod(Constants.invokeMethod)
                 .messages(messages)
                 .requestId(requestId)
-                .temperature(llm != null ? llm.getTemperature().floatValue() : (float) zhipuaiChatConfig.getTemperature())
+                .temperature(llm.getTemperature().floatValue())
                 .build();
 
         // 添加请求日志
@@ -152,11 +197,11 @@ public class ZhipuaiService extends BaseSpringAIService {
      * @param response ModelApiResponse对象
      * @return TokenUsage对象
      */
-    private TokenUsage extractZhipuaiTokenUsage(ModelApiResponse response) {
+    private ChatTokenUsage extractZhipuaiTokenUsage(ModelApiResponse response) {
         try {
             if (response == null) {
                 log.warn("Zhipuai API response is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             log.info("Zhipuai API manual token extraction - response success: {}, has data: {}", 
@@ -164,7 +209,7 @@ public class ZhipuaiService extends BaseSpringAIService {
             
             if (!response.isSuccess() || response.getData() == null) {
                 log.warn("Zhipuai API response is not successful or has no data");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             // 尝试从response.getData()中获取usage信息
@@ -213,7 +258,7 @@ public class ZhipuaiService extends BaseSpringAIService {
                         log.info("Zhipuai API manual token extraction result - prompt: {}, completion: {}, total: {}", 
                                 promptTokens, completionTokens, totalTokens);
                         
-                        return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                        return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
                     }
                 }
             } catch (Exception e) {
@@ -291,7 +336,7 @@ public class ZhipuaiService extends BaseSpringAIService {
                 log.info("Zhipuai API manual token extraction result from string parsing - prompt: {}, completion: {}, total: {}", 
                         promptTokens, completionTokens, totalTokens);
                 
-                return new TokenUsage(promptTokens, completionTokens, totalTokens);
+                return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
             }
             
             // 如果所有方法都失败，尝试估算token使用量
@@ -300,7 +345,7 @@ public class ZhipuaiService extends BaseSpringAIService {
             
         } catch (Exception e) {
             log.error("Error in manual Zhipuai token extraction", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
 
@@ -310,10 +355,10 @@ public class ZhipuaiService extends BaseSpringAIService {
      * @param response ModelApiResponse对象
      * @return 估算的TokenUsage对象
      */
-    private TokenUsage estimateTokenUsageFromResponse(ModelApiResponse response) {
+    private ChatTokenUsage estimateTokenUsageFromResponse(ModelApiResponse response) {
         try {
             if (response == null || response.getData() == null) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             // 获取输出文本
@@ -334,11 +379,11 @@ public class ZhipuaiService extends BaseSpringAIService {
             log.info("Zhipuai API estimated tokens - output: {} chars -> {} tokens, estimated prompt: {} tokens, total: {} tokens", 
                     outputText.length(), completionTokens, promptTokens, totalTokens);
             
-            return new TokenUsage(promptTokens, completionTokens, totalTokens);
+            return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
             
         } catch (Exception e) {
             log.error("Error estimating token usage", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
     
@@ -379,10 +424,10 @@ public class ZhipuaiService extends BaseSpringAIService {
      * @param message 输入消息
      * @return 估算的TokenUsage对象
      */
-    private TokenUsage estimateTokenUsageFromMessage(String message) {
+    private ChatTokenUsage estimateTokenUsageFromMessage(String message) {
         try {
             if (message == null || message.isEmpty()) {
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             // 估算输入token数量
@@ -395,11 +440,11 @@ public class ZhipuaiService extends BaseSpringAIService {
             log.info("Zhipuai API estimated token usage from message - input: {} chars -> {} tokens, estimated output: {} tokens, total: {} tokens", 
                     message.length(), promptTokens, completionTokens, totalTokens);
             
-            return new TokenUsage(promptTokens, completionTokens, totalTokens);
+            return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
             
         } catch (Exception e) {
             log.error("Error estimating token usage from message", e);
-            return new TokenUsage(0, 0, 0);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
 
@@ -414,28 +459,19 @@ public class ZhipuaiService extends BaseSpringAIService {
         log.info("Zhipuai API websocket prompt: {}", prompt);
         log.info("Zhipuai API websocket fullPromptContent: {}", fullPromptContent);
 
-        // 添加请求日志
-        log.info("Zhipuai API websocket request - model: {}, prompt instructions count: {}, robot: {}", 
-                (llm != null && llm.getTextModel() != null) ? llm.getTextModel() : zhipuaiChatConfig.getModel(), 
-                prompt != null && prompt.getInstructions() != null ? prompt.getInstructions().size() : 0, 
-                robot != null ? robot.getUid() : "null");
-
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         final ChatMessageAccumulator[] finalAccumulator = {null};
 
-        try {
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                sendMessageWebsocket(MessageTypeEnum.ERROR, "Zhipuai client is not available", messageProtobufReply);
-                return;
-            }
+        // 获取适当的client实例
+        ClientV4 chatClient = createDynamicClient(llm);
 
+        try {
             ChatCompletionRequest chatCompletionRequest = createDynamicRequestFromPrompt(llm, prompt, true);
             
             log.info("Zhipuai API invoking model with requestId: {}", chatCompletionRequest.getRequestId());
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
             
             if (response.isSuccess()) {
                 log.info("Zhipuai API websocket response success, starting stream processing");
@@ -520,7 +556,7 @@ public class ZhipuaiService extends BaseSpringAIService {
                             
                             // 记录token使用情况
                             long responseTime = System.currentTimeMillis() - startTime;
-                            String modelType = (llm != null && llm.getTextModel() != null) ? llm.getTextModel() : zhipuaiChatConfig.getModel();
+                            String modelType = llm.getTextModel();
                             log.info("Zhipuai API websocket recording token usage - prompt: {}, completion: {}, total: {}, model: {}, responseTime: {}ms", 
                                     tokenUsage[0].getPromptTokens(), tokenUsage[0].getCompletionTokens(), tokenUsage[0].getTotalTokens(), modelType, responseTime);
                             recordAiTokenUsage(robot, LlmConsts.ZHIPUAI, modelType, 
@@ -550,27 +586,27 @@ public class ZhipuaiService extends BaseSpringAIService {
      */
     @Override
     protected String processPromptSync(String message, RobotProtobuf robot, String fullPromptContent) {
+        if (robot == null || robot.getLlm() == null || robot.getLlm().getTextModel() == null) {
+            log.error("Robot or RobotLlm is null, cannot process prompt sync");
+            return "Error: Robot or RobotLlm is not configured";
+        }
+        // 从robot中获取llm配置
+        RobotLlm llm = robot.getLlm();
         // 添加请求日志
-        RobotLlm llm = robot != null ? robot.getLlm() : null;
-        log.info("Zhipuai API sync request - model: {}, message length: {}, robot: {}", 
-                (llm != null && llm.getTextModel() != null) ? llm.getTextModel() : zhipuaiChatConfig.getModel(), 
-                message.length(), robot != null ? robot.getUid() : "null");
         log.info("Zhipuai API sync fullPromptContent: {}", fullPromptContent);
 
         long startTime = System.currentTimeMillis();
         boolean success = false;
-        TokenUsage tokenUsage = new TokenUsage(0, 0, 0);
+        ChatTokenUsage tokenUsage = new ChatTokenUsage(0, 0, 0);
+        
+        // 获取适当的client实例
+        ClientV4 chatClient = createDynamicClient(llm);
         
         try {
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                return "Zhipuai client is not available";
-            }
-
             ChatCompletionRequest chatCompletionRequest = createDynamicRequest(llm, message, false);
             
             log.info("Zhipuai API invoking sync model with requestId: {}", chatCompletionRequest.getRequestId());
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
             
             if (response.isSuccess() && response.getData() != null) {
                 log.info("Zhipuai API sync response success");
@@ -592,7 +628,7 @@ public class ZhipuaiService extends BaseSpringAIService {
         } finally {
             // 记录token使用情况
             long responseTime = System.currentTimeMillis() - startTime;
-            String modelType = (llm != null && llm.getTextModel() != null) ? llm.getTextModel() : zhipuaiChatConfig.getModel();
+            String modelType = llm.getTextModel();
             log.info("Zhipuai API sync recording token usage - prompt: {}, completion: {}, total: {}, model: {}, responseTime: {}ms", 
                     tokenUsage.getPromptTokens(), tokenUsage.getCompletionTokens(), tokenUsage.getTotalTokens(), modelType, responseTime);
             recordAiTokenUsage(robot, LlmConsts.ZHIPUAI, modelType, 
@@ -606,38 +642,34 @@ public class ZhipuaiService extends BaseSpringAIService {
     @Override
     protected void processPromptSse(Prompt prompt, RobotProtobuf robot, MessageProtobuf messageProtobufQuery,
             MessageProtobuf messageProtobufReply, SseEmitter emitter, String fullPromptContent) {
+        if (robot == null || robot.getLlm() == null || robot.getLlm().getTextModel() == null) {
+            log.error("Robot or RobotLlm is null, cannot process prompt SSE");
+            sendSseMessage("服务暂时不可用，请稍后重试", robot, messageProtobufQuery, messageProtobufReply, emitter);
+            return;
+        }
         // if (robot)
         // 从robot中获取llm配置
         RobotLlm llm = robot.getLlm();
         log.info("Zhipuai API SSE prompt: {}", prompt);
         log.info("Zhipuai API SSE fullPromptContent: {}", fullPromptContent);
-        
-        // 添加请求日志
-        log.info("Zhipuai API SSE request - model: {}, prompt instructions count: {}, robot: {}", 
-                (llm != null && llm.getTextModel() != null) ? llm.getTextModel() : zhipuaiChatConfig.getModel(), 
-                prompt != null && prompt.getInstructions() != null ? prompt.getInstructions().size() : 0, 
-                robot != null ? robot.getUid() : "null");
-        
+
         // 发送起始消息
         sendStreamStartMessage(messageProtobufReply, emitter, "正在思考中...");
 
         long startTime = System.currentTimeMillis();
         final boolean[] success = {false};
-        final TokenUsage[] tokenUsage = {new TokenUsage(0, 0, 0)};
+        final ChatTokenUsage[] tokenUsage = {new ChatTokenUsage(0, 0, 0)};
         final ChatMessageAccumulator[] finalAccumulator = {null};
-        final String modelType = (llm != null && llm.getTextModel() != null) ? llm.getTextModel() : zhipuaiChatConfig.getModel();
+        final String modelType = llm.getTextModel();
+
+        // 获取适当的client实例
+        ClientV4 chatClient = createDynamicClient(llm);
 
         try {
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                handleSseError(new Exception("Zhipuai client is not available"), messageProtobufQuery, messageProtobufReply, emitter);
-                return;
-            }
-
             ChatCompletionRequest chatCompletionRequest = createDynamicRequestFromPrompt(llm, prompt, true);
             
             log.info("Zhipuai API invoking SSE model with requestId: {}", chatCompletionRequest.getRequestId());
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
+            ModelApiResponse response = chatClient.invokeModelApi(chatCompletionRequest);
             
             if (response.isSuccess()) {
                 log.info("Zhipuai API SSE response success, starting stream processing");
@@ -653,10 +685,6 @@ public class ZhipuaiService extends BaseSpringAIService {
                                     log.info("Zhipuai API SSE Response: ");
                                 }
                                 
-                                log.info("Zhipuai API SSE accumulator received: {}", accumulator);
-                                log.info("Zhipuai API SSE accumulator class: {}", accumulator.getClass().getName());
-                                log.info("Zhipuai API SSE accumulator tokenUsage: {}", accumulator.getUsage());
-                                
                                 // 保存最新的accumulator用于token统计
                                 finalAccumulator[0] = accumulator;
                                 
@@ -782,490 +810,7 @@ public class ZhipuaiService extends BaseSpringAIService {
         return fullPrompt.toString().trim();
     }
 
-    /**
-     * 角色扮演聊天
-     */
-    public String rolePlayChat(String message, String userInfo, String botInfo, String botName, String userName) {
-        // 添加请求日志
-        log.info("Zhipuai API roleplay request - message length: {}, userInfo: {}, botInfo: {}, botName: {}, userName: {}", 
-                message.length(), userInfo, botInfo, botName, userName);
-        
-        long startTime = System.currentTimeMillis();
-        
-        try {
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                return "Zhipuai client is not available";
-            }
-
-            List<ChatMessage> messages = new ArrayList<>();
-            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
-            messages.add(chatMessage);
-
-            ChatMeta meta = new ChatMeta();
-            meta.setUser_info(userInfo);
-            meta.setBot_info(botInfo);
-            meta.setBot_name(botName);
-            meta.setUser_name(userName);
-
-            String requestId = String.format("roleplay-%d", System.currentTimeMillis());
-            
-            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-                    .model(Constants.ModelCharGLM3)
-                    .stream(Boolean.FALSE)
-                    .invokeMethod(Constants.invokeMethod)
-                    .messages(messages)
-                    .meta(meta)
-                    .requestId(requestId)
-                    .build();
-            
-            log.info("Zhipuai API roleplay invoking model with requestId: {}", requestId);
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
-            
-            if (response.isSuccess() && response.getData() != null) {
-                log.info("Zhipuai API roleplay response success");
-                
-                // 提取token使用情况
-                TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
-                log.info("Zhipuai API roleplay tokenUsage: {}", tokenUsage);
-                
-                Object content = response.getData().getChoices().get(0).getMessage().getContent();
-                return content != null ? content.toString() : null;
-            } else {
-                log.error("Zhipuai API roleplay error: {}", response.getError());
-                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
-            }
-        } catch (Exception e) {
-            log.error("Zhipuai API roleplay error: ", e);
-            return "Error: " + e.getMessage();
-        } finally {
-            long responseTime = System.currentTimeMillis() - startTime;
-            log.info("Zhipuai API roleplay completed in {}ms", responseTime);
-        }
-    }
-
-    /**
-     * Function Calling 聊天
-     */
-    public String functionCallingChat(String message, List<ChatFunction> functions) {
-        return functionCallingChat(message, null, null, functions);
-    }
-
-    /**
-     * Function Calling 聊天（带自定义参数）
-     */
-    public String functionCallingChat(String message, String model, Double temperature, List<ChatFunction> functions) {
-        // 添加请求日志
-        log.info("Zhipuai API function calling request - message length: {}, model: {}, temperature: {}, functions count: {}", 
-                message.length(), model, temperature, functions != null ? functions.size() : 0);
-        
-        long startTime = System.currentTimeMillis();
-        
-        try {
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                return "Zhipuai client is not available";
-            }
-
-            List<ChatMessage> messages = new ArrayList<>();
-            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
-            messages.add(chatMessage);
-
-            List<ChatTool> chatToolList = new ArrayList<>();
-            if (functions != null) {
-                for (ChatFunction function : functions) {
-                    ChatTool chatTool = new ChatTool();
-                    chatTool.setType(ChatToolType.FUNCTION.value());
-                    chatTool.setFunction(function);
-                    chatToolList.add(chatTool);
-                }
-            }
-
-            String requestId = String.format("function-%d", System.currentTimeMillis());
-            
-            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-                    .model(model != null ? model : zhipuaiChatConfig.getModel())
-                    .stream(Boolean.FALSE)
-                    .invokeMethod(Constants.invokeMethod)
-                    .messages(messages)
-                    .requestId(requestId)
-                    .temperature(temperature != null ? temperature.floatValue() : (float) zhipuaiChatConfig.getTemperature())
-                    .tools(chatToolList)
-                    .toolChoice("auto")
-                    .build();
-            
-            log.info("Zhipuai API function calling invoking model with requestId: {}", requestId);
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
-            
-            if (response.isSuccess() && response.getData() != null) {
-                log.info("Zhipuai API function calling response success");
-                
-                // 提取token使用情况
-                TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
-                log.info("Zhipuai API function calling tokenUsage: {}", tokenUsage);
-                
-                Object content = response.getData().getChoices().get(0).getMessage().getContent();
-                return content != null ? content.toString() : null;
-            } else {
-                log.error("Zhipuai API function calling error: {}", response.getError());
-                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
-            }
-        } catch (Exception e) {
-            log.error("Zhipuai API function calling error: ", e);
-            return "Error: " + e.getMessage();
-        } finally {
-            long responseTime = System.currentTimeMillis() - startTime;
-            log.info("Zhipuai API function calling completed in {}ms", responseTime);
-        }
-    }
-
-    /**
-     * 流式 Function Calling 聊天
-     */
-    public Flux<String> functionCallingChatStream(String message, List<ChatFunction> functions) {
-        return functionCallingChatStream(message, null, null, functions);
-    }
-
-    /**
-     * 流式 Function Calling 聊天（带自定义参数）
-     */
-    public Flux<String> functionCallingChatStream(String message, String model, Double temperature, List<ChatFunction> functions) {
-        try {
-            if (client == null) {
-                return Flux.just("Zhipuai client is not available");
-            }
-
-            List<ChatMessage> messages = new ArrayList<>();
-            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
-            messages.add(chatMessage);
-
-            List<ChatTool> chatToolList = new ArrayList<>();
-            if (functions != null) {
-                for (ChatFunction function : functions) {
-                    ChatTool chatTool = new ChatTool();
-                    chatTool.setType(ChatToolType.FUNCTION.value());
-                    chatTool.setFunction(function);
-                    chatToolList.add(chatTool);
-                }
-            }
-
-            String requestId = String.format("function-stream-%d", System.currentTimeMillis());
-            
-            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-                    .model(model != null ? model : zhipuaiChatConfig.getModel())
-                    .stream(Boolean.TRUE)
-                    .messages(messages)
-                    .requestId(requestId)
-                    .temperature(temperature != null ? temperature.floatValue() : (float) zhipuaiChatConfig.getTemperature())
-                    .tools(chatToolList)
-                    .toolChoice("auto")
-                    .build();
-            
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
-            
-            if (response.isSuccess()) {
-                return Flux.from(mapStreamToAccumulator(response.getFlowable()).map(accumulator -> {
-                    log.info("Zhipuai API function calling accumulator received: {}", accumulator);
-                    log.info("Zhipuai API function calling accumulator class: {}", accumulator.getClass().getName());
-                    
-                    Object delta = accumulator.getDelta();
-                    log.info("Zhipuai API function calling delta: {}", delta);
-                    log.info("Zhipuai API function calling delta class: {}", delta != null ? delta.getClass().getName() : "null");
-                    
-                    // 处理tool_calls（如果有的话）
-                    if (delta != null && delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
-                        com.zhipu.oapi.service.v4.model.ChatMessage deltaMessage = (com.zhipu.oapi.service.v4.model.ChatMessage) delta;
-                        if (deltaMessage.getTool_calls() != null) {
-                            log.info("Zhipuai API function calling tool_calls: {}", deltaMessage.getTool_calls());
-                        }
-                    }
-                    
-                    if (delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
-                        Object content = ((com.zhipu.oapi.service.v4.model.ChatMessage) delta).getContent();
-                        log.info("Zhipuai API function calling content: {}", content);
-                        return content != null ? content.toString() : "";
-                    } else if (delta != null) {
-                        String deltaStr = delta.toString();
-                        log.info("Zhipuai API function calling delta as string: {}", deltaStr);
-                        
-                        // 尝试从JSON字符串中提取content字段
-                        String extractedContent = extractContentFromDeltaString(deltaStr);
-                        if (extractedContent != null && !extractedContent.isEmpty()) {
-                            log.info("Zhipuai API function calling extracted content: {}", extractedContent);
-                            return extractedContent;
-                        } else if (!isEmptyAssistantMessage(deltaStr)) {
-                            return deltaStr;
-                        } else {
-                            return "";
-                        }
-                    }
-                    return "";
-                }));
-            } else {
-                log.error("Zhipuai API error: {}", response.getError());
-                return Flux.just("Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error"));
-            }
-        } catch (Exception e) {
-            log.error("Error in functionCallingChatStream", e);
-            return Flux.just("Error: " + e.getMessage());
-        }
-    }
-
-    /**
-     * 图像生成 - 暂不支持，需要等待SDK更新
-     */
-    public String generateImage(String prompt) {
-        return "Image generation is not supported in current SDK version";
-    }
-
-    /**
-     * 图像生成（带请求ID）- 暂不支持，需要等待SDK更新
-     */
-    public String generateImage(String prompt, String requestId) {
-        return "Image generation is not supported in current SDK version";
-    }
-
-    /**
-     * 向量嵌入 - 暂不支持，需要等待SDK更新
-     */
-    public List<Double> getEmbedding(String text) {
-        log.warn("Embedding is not supported in current SDK version");
-        return new ArrayList<>();
-    }
-
-    /**
-     * 批量向量嵌入 - 暂不支持，需要等待SDK更新
-     */
-    public List<List<Double>> getEmbeddings(List<String> texts) {
-        log.warn("Embeddings is not supported in current SDK version");
-        return new ArrayList<>();
-    }
-
-    /**
-     * 语音合成 - 暂不支持，需要等待SDK更新
-     */
-    public File generateSpeech(String text, String voice, String responseFormat) {
-        log.warn("Speech synthesis is not supported in current SDK version");
-        return null;
-    }
-
-    /**
-     * 自定义语音合成 - 暂不支持，需要等待SDK更新
-     */
-    public File generateCustomSpeech(String text, String voiceText, File voiceData, String responseFormat) {
-        log.warn("Custom voice synthesis is not supported in current SDK version");
-        return null;
-    }
-
-    /**
-     * 文件上传 - 暂不支持，需要等待SDK更新
-     */
-    public String uploadFile(String filePath, String purpose) {
-        return "File upload is not supported in current SDK version";
-    }
-
-    /**
-     * 查询文件列表 - 暂不支持，需要等待SDK更新
-     */
-    public List<Map<String, Object>> queryFiles() {
-        log.warn("File query is not supported in current SDK version");
-        return new ArrayList<>();
-    }
-
-    /**
-     * 下载文件内容 - 暂不支持，需要等待SDK更新
-     */
-    public File downloadFile(String fileId, String outputPath) {
-        log.warn("File download is not supported in current SDK version");
-        return null;
-    }
-
-    /**
-     * 创建微调任务 - 暂不支持，需要等待SDK更新
-     */
-    public String createFineTuningJob(String model, String trainingFile) {
-        return "Fine-tuning is not supported in current SDK version";
-    }
-
-    /**
-     * 查询微调任务 - 暂不支持，需要等待SDK更新
-     */
-    public Map<String, Object> queryFineTuningJob(String jobId) {
-        log.warn("Fine-tuning query is not supported in current SDK version");
-        return new HashMap<>();
-    }
-
-    /**
-     * 异步聊天
-     */
-    public String chatAsync(String message) {
-        // 添加请求日志
-        log.info("Zhipuai API async request - message length: {}", message.length());
-        
-        long startTime = System.currentTimeMillis();
-        
-        try {
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                return "Zhipuai client is not available";
-            }
-
-            List<ChatMessage> messages = new ArrayList<>();
-            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
-            messages.add(chatMessage);
-
-            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-                    .model(zhipuaiChatConfig.getModel())
-                    .stream(Boolean.FALSE)
-                    .invokeMethod(Constants.invokeMethodAsync)
-                    .messages(messages)
-                    .build();
-            
-            log.info("Zhipuai API async invoking model");
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
-            
-            if (response.isSuccess() && response.getData() != null) {
-                String taskId = response.getData().getId();
-                log.info("Zhipuai API async task created with taskId: {}", taskId);
-                
-                // 轮询获取结果
-                return pollAsyncResult(taskId);
-            } else {
-                log.error("Zhipuai API async error: {}", response.getError());
-                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
-            }
-        } catch (Exception e) {
-            log.error("Zhipuai API async error: ", e);
-            return "Error: " + e.getMessage();
-        } finally {
-            long responseTime = System.currentTimeMillis() - startTime;
-            log.info("Zhipuai API async completed in {}ms", responseTime);
-        }
-    }
-
-    /**
-     * 轮询异步结果
-     */
-    private String pollAsyncResult(String taskId) {
-        log.info("Zhipuai API starting async result polling for taskId: {}", taskId);
-        
-        try {
-            int maxAttempts = 30; // 最多轮询30次
-            int attempt = 0;
-            
-            while (attempt < maxAttempts) {
-                log.debug("Zhipuai API polling attempt {}/{} for taskId: {}", attempt + 1, maxAttempts, taskId);
-                
-                QueryModelResultRequest request = new QueryModelResultRequest();
-                request.setTaskId(taskId);
-                
-                QueryModelResultResponse response = client.queryModelResult(request);
-                
-                if (response.isSuccess() && response.getData() != null) {
-                    Object taskStatus = response.getData().getTaskStatus();
-                    log.debug("Zhipuai API task status: {} for taskId: {}", taskStatus, taskId);
-                    
-                    if ("SUCCESS".equals(taskStatus.toString())) {
-                        log.info("Zhipuai API async task completed successfully for taskId: {}", taskId);
-                        Object content = response.getData().getChoices().get(0).getMessage().getContent();
-                        return content != null ? content.toString() : null;
-                    } else if ("FAILED".equals(taskStatus.toString())) {
-                        log.error("Zhipuai API async task failed for taskId: {}", taskId);
-                        return "Task failed";
-                    }
-                } else {
-                    log.warn("Zhipuai API async polling response not successful for taskId: {}", taskId);
-                }
-                
-                attempt++;
-                Thread.sleep(2000); // 等待2秒后重试
-            }
-            
-            log.error("Zhipuai API async task timeout after {} attempts for taskId: {}", maxAttempts, taskId);
-            return "Task timeout after " + maxAttempts + " attempts";
-        } catch (Exception e) {
-            log.error("Zhipuai API error polling async result for taskId: {}", taskId, e);
-            return "Error: " + e.getMessage();
-        }
-    }
-
-    /**
-     * 带Web搜索的聊天
-     */
-    public String chatWithWebSearch(String message, String searchQuery) {
-        // 添加请求日志
-        log.info("Zhipuai API web search request - message length: {}, searchQuery: {}", 
-                message.length(), searchQuery);
-        
-        long startTime = System.currentTimeMillis();
-        
-        try {
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                return "Zhipuai client is not available";
-            }
-
-            List<ChatMessage> messages = new ArrayList<>();
-            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), message);
-            messages.add(chatMessage);
-
-            List<ChatTool> chatToolList = new ArrayList<>();
-            
-            // 添加Web搜索工具
-            ChatTool webSearchTool = new ChatTool();
-            webSearchTool.setType(ChatToolType.WEB_SEARCH.value());
-            WebSearch webSearch = new WebSearch();
-            webSearch.setSearch_query(searchQuery);
-            webSearch.setSearch_result(true);
-            webSearch.setEnable(true);
-            webSearchTool.setWeb_search(webSearch);
-            chatToolList.add(webSearchTool);
-
-            String requestId = String.format("websearch-%d", System.currentTimeMillis());
-            
-            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-                    .model(zhipuaiChatConfig.getModel())
-                    .stream(Boolean.FALSE)
-                    .invokeMethod(Constants.invokeMethod)
-                    .messages(messages)
-                    .requestId(requestId)
-                    .tools(chatToolList)
-                    .toolChoice("auto")
-                    .build();
-            
-            log.info("Zhipuai API web search invoking model with requestId: {}", requestId);
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
-            
-            if (response.isSuccess() && response.getData() != null) {
-                log.info("Zhipuai API web search response success");
-                
-                // 提取token使用情况
-                TokenUsage tokenUsage = extractZhipuaiTokenUsage(response);
-                log.info("Zhipuai API web search tokenUsage: {}", tokenUsage);
-                
-                Object content = response.getData().getChoices().get(0).getMessage().getContent();
-                return content != null ? content.toString() : null;
-            } else {
-                log.error("Zhipuai API web search error: {}", response.getError());
-                return "Error: " + (response.getError() != null ? response.getError().getMessage() : "Unknown error");
-            }
-        } catch (Exception e) {
-            log.error("Zhipuai API web search error: ", e);
-            return "Error: " + e.getMessage();
-        } finally {
-            long responseTime = System.currentTimeMillis() - startTime;
-            log.info("Zhipuai API web search completed in {}ms", responseTime);
-        }
-    }
-
-    /**
-     * 语音模型聊天 - 暂不支持，需要等待SDK更新
-     */
-    public String chatWithVoice(String message) {
-        log.warn("Voice chat is not supported in current SDK version");
-        return "Voice chat is not supported in current SDK version";
-    }
-
+    
     /**
      * 将流式响应转换为Accumulator，参考官方示例
      */
@@ -1379,11 +924,11 @@ public class ZhipuaiService extends BaseSpringAIService {
     /**
      * 从ChatMessageAccumulator中提取token使用情况
      */
-    private TokenUsage extractTokenUsageFromAccumulator(ChatMessageAccumulator accumulator) {
+    private ChatTokenUsage extractTokenUsageFromAccumulator(ChatMessageAccumulator accumulator) {
         try {
             if (accumulator == null || accumulator.getUsage() == null) {
                 log.warn("Zhipuai API accumulator or usage is null");
-                return new TokenUsage(0, 0, 0);
+                return new ChatTokenUsage(0, 0, 0);
             }
             
             var usage = accumulator.getUsage();
@@ -1435,143 +980,77 @@ public class ZhipuaiService extends BaseSpringAIService {
             log.info("Zhipuai API accumulator token extraction result - prompt: {}, completion: {}, total: {}", 
                     promptTokens, completionTokens, totalTokens);
             
-            return new TokenUsage(promptTokens, completionTokens, totalTokens);
+            return new ChatTokenUsage(promptTokens, completionTokens, totalTokens);
             
         } catch (Exception e) {
             log.error("Error extracting token usage from accumulator", e);
-            return new TokenUsage(0, 0, 0);
-        }
-    }
-
-    /**
-     * 测试流式响应功能
-     * 用于调试流式响应问题
-     */
-    public void testStreamResponse() {
-        try {
-            log.info("Zhipuai API testing stream response...");
-            
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                return;
-            }
-            
-            // 创建一个简单的测试请求
-            String testMessage = "Hello, this is a test message for stream response.";
-            ChatCompletionRequest chatCompletionRequest = createDynamicRequest(null, testMessage, true);
-            
-            log.info("Zhipuai API making stream test call with message: {}", testMessage);
-            
-            // 调用API
-            ModelApiResponse response = client.invokeModelApi(chatCompletionRequest);
-            
-            log.info("Zhipuai API stream test response success: {}", response.isSuccess());
-            
-            if (response.isSuccess()) {
-                log.info("Zhipuai API stream test starting flowable processing");
-                
-                final int[] messageCount = {0};
-                
-                // 使用AtomicBoolean来标记是否是第一个消息，参考官方示例
-                // java.util.concurrent.atomic.AtomicBoolean isFirst = new java.util.concurrent.atomic.AtomicBoolean(true);
-                
-                mapStreamToAccumulator(response.getFlowable())
-                        .doOnNext(accumulator -> {
-                            messageCount[0]++;
-                            log.info("Zhipuai API stream test message #{}: accumulator={}", messageCount[0], accumulator);
-                            log.info("Zhipuai API stream test message #{}: accumulator class={}", messageCount[0], accumulator.getClass().getName());
-                            
-                            Object delta = accumulator.getDelta();
-                            log.info("Zhipuai API stream test message #{}: delta={}", messageCount[0], delta);
-                            log.info("Zhipuai API stream test message #{}: delta class={}", messageCount[0], delta != null ? delta.getClass().getName() : "null");
-                            
-                            if (delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
-                                Object content = ((com.zhipu.oapi.service.v4.model.ChatMessage) delta).getContent();
-                                log.info("Zhipuai API stream test message #{}: content={}", messageCount[0], content);
-                            } else {
-                                log.info("Zhipuai API stream test message #{}: delta is not ChatMessage", messageCount[0]);
-                            }
-                        })
-                        .doOnComplete(() -> {
-                            log.info("Zhipuai API stream test completed, total messages: {}", messageCount[0]);
-                        })
-                        .doOnError(error -> {
-                            log.error("Zhipuai API stream test error: ", error);
-                        })
-                        .subscribe();
-            } else {
-                log.error("Zhipuai API stream test failed: {}", response.getError());
-            }
-            
-        } catch (Exception e) {
-            log.error("Zhipuai API test stream response error", e);
+            return new ChatTokenUsage(0, 0, 0);
         }
     }
 
+    
     /**
      * 简单流式测试 - 完全按照官方示例代码实现
      * 用于调试流式响应问题
      */
-    public void testSimpleStream() {
-        try {
-            log.info("Zhipuai API testing simple stream response...");
+    // public void testSimpleStream() {
+    //     // 使用默认client进行简单测试
+    //     ClientV4 chatClient = client;
+        
+    //     try {
+    //         log.info("Zhipuai API testing simple stream response...");
             
-            if (client == null) {
-                log.error("Zhipuai API client is null");
-                return;
-            }
+    //         // 完全按照官方示例代码实现
+    //         List<ChatMessage> messages = new ArrayList<>();
+    //         ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), "What is the relationship between ZhipuAI and ChatGLM?");
+    //         messages.add(chatMessage);
             
-            // 完全按照官方示例代码实现
-            List<ChatMessage> messages = new ArrayList<>();
-            ChatMessage chatMessage = new ChatMessage(ChatMessageRole.USER.value(), "What is the relationship between ZhipuAI and ChatGLM?");
-            messages.add(chatMessage);
+    //         String requestId = String.format("your-request-id-%d", System.currentTimeMillis());
+    //         ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
+    //                 .model(Constants.ModelChatGLM4)
+    //                 .stream(Boolean.TRUE)
+    //                 .messages(messages)
+    //                 .requestId(requestId)
+    //                 .build();
             
-            String requestId = String.format("your-request-id-%d", System.currentTimeMillis());
-            ChatCompletionRequest chatCompletionRequest = ChatCompletionRequest.builder()
-                    .model(Constants.ModelChatGLM4)
-                    .stream(Boolean.TRUE)
-                    .messages(messages)
-                    .requestId(requestId)
-                    .build();
+    //         log.info("Zhipuai API making simple stream test call with requestId: {}", requestId);
             
-            log.info("Zhipuai API making simple stream test call with requestId: {}", requestId);
+    //         ModelApiResponse sseModelApiResp = chatClient.invokeModelApi(chatCompletionRequest);
             
-            ModelApiResponse sseModelApiResp = client.invokeModelApi(chatCompletionRequest);
-            
-            if (sseModelApiResp.isSuccess()) {
-                log.info("Zhipuai API simple stream test response success");
+    //         if (sseModelApiResp.isSuccess()) {
+    //             log.info("Zhipuai API simple stream test response success");
                 
-                // java.util.concurrent.atomic.AtomicBoolean isFirst = new java.util.concurrent.atomic.AtomicBoolean(true);
-                final int[] messageCount = {0};
+    //             // java.util.concurrent.atomic.AtomicBoolean isFirst = new java.util.concurrent.atomic.AtomicBoolean(true);
+    //             final int[] messageCount = {0};
                 
-                mapStreamToAccumulator(sseModelApiResp.getFlowable())
-                        .doOnNext(accumulator -> {
-                            messageCount[0]++;
-                            log.info("Zhipuai API simple stream test message #{}: accumulator: {}", messageCount[0], accumulator);
+    //             mapStreamToAccumulator(sseModelApiResp.getFlowable())
+    //                     .doOnNext(accumulator -> {
+    //                         messageCount[0]++;
+    //                         log.info("Zhipuai API simple stream test message #{}: accumulator: {}", messageCount[0], accumulator);
                             
-                            Object delta = accumulator.getDelta();
-                            log.info("Zhipuai API simple stream test message #{}: delta: {}", messageCount[0], delta);
+    //                         Object delta = accumulator.getDelta();
+    //                         log.info("Zhipuai API simple stream test message #{}: delta: {}", messageCount[0], delta);
                             
-                            if (delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
-                                Object content = ((com.zhipu.oapi.service.v4.model.ChatMessage) delta).getContent();
-                                log.info("Zhipuai API simple stream test message #{}: content: {}", messageCount[0], content);
-                            }
-                        })
-                        .doOnComplete(() -> {
-                            log.info("Zhipuai API simple stream test completed, total messages: {}", messageCount[0]);
-                        })
-                        .doOnError(error -> {
-                            log.error("Zhipuai API simple stream test error: ", error);
-                        })
-                        .subscribe();
-            } else {
-                log.error("Zhipuai API simple stream test failed: {}", sseModelApiResp.getError());
-            }
+    //                         if (delta instanceof com.zhipu.oapi.service.v4.model.ChatMessage) {
+    //                             Object content = ((com.zhipu.oapi.service.v4.model.ChatMessage) delta).getContent();
+    //                             log.info("Zhipuai API simple stream test message #{}: content: {}", messageCount[0], content);
+    //                         }
+    //                     })
+    //                     .doOnComplete(() -> {
+    //                         log.info("Zhipuai API simple stream test completed, total messages: {}", messageCount[0]);
+    //                     })
+    //                     .doOnError(error -> {
+    //                         log.error("Zhipuai API simple stream test error: ", error);
+    //                     })
+    //                     .subscribe();
+    //         } else {
+    //             log.error("Zhipuai API simple stream test failed: {}", sseModelApiResp.getError());
+    //         }
             
-        } catch (Exception e) {
-            log.error("Zhipuai API test simple stream error", e);
-        }
-    }
+    //     } catch (Exception e) {
+    //         log.error("Zhipuai API test simple stream error", e);
+    //     }
+    // }
 
     /**
      * 测试content提取功能
@@ -1620,11 +1099,6 @@ public class ZhipuaiService extends BaseSpringAIService {
         log.info("Zhipuai API health check started");
         
         try {
-            if (client == null) {
-                log.error("Zhipuai API health check failed: client is null");
-                return false;
-            }
-            
             // 发送一个简单的测试请求
             String response = processPromptSync("Hello", null, "");
             boolean isHealthy = response != null && !response.startsWith("Error");