refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions
--- a/crates/zclaw-runtime/src/driver/anthropic.rs
+++ b/crates/zclaw-runtime/src/driver/anthropic.rs
@@ -181,8 +181,12 @@ impl LlmDriver for AnthropicDriver {
                                        }
                                    }
                                    "error" => {
+                                        let error_msg = serde_json::from_str::<serde_json::Value>(&data)
+                                            .ok()
+                                            .and_then(|v| v.get("error").and_then(|e| e.get("message")).and_then(|m| m.as_str().map(String::from)))
+                                            .unwrap_or_else(|| format!("Stream error: {}", &data[..data.len().min(200)]));
                                        yield Ok(StreamChunk::Error {
-                                            message: "Stream error".to_string(),
+                                            message: error_msg,
                                        });
                                    }
                                    _ => {}
@@ -251,15 +255,42 @@ impl AnthropicDriver {
            })
            .collect();

+        let requested_max = request.max_tokens.unwrap_or(4096);
+        let (thinking, budget) = if request.thinking_enabled {
+            let budget = match request.reasoning_effort.as_deref() {
+                Some("low") => 2000,
+                Some("medium") => 10000,
+                Some("high") => 32000,
+                _ => 10000, // default
+            };
+            (Some(AnthropicThinking {
+                r#type: "enabled".to_string(),
+                budget_tokens: budget,
+            }), budget)
+        } else {
+            (None, 0)
+        };
+
+        // When thinking is enabled, max_tokens is the TOTAL budget (thinking + text).
+        // Use the maximum output limit (65536) so thinking can consume whatever it
+        // needs without starving the text response.  We only pay for tokens actually
+        // generated, so a high limit costs nothing extra.
+        let effective_max = if budget > 0 {
+            65536
+        } else {
+            requested_max
+        };
+
        AnthropicRequest {
            model: request.model.clone(),
-            max_tokens: request.max_tokens.unwrap_or(4096),
+            max_tokens: effective_max,
            system: request.system.clone(),
            messages,
            tools: if tools.is_empty() { None } else { Some(tools) },
            temperature: request.temperature,
            stop_sequences: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
            stream: request.stream,
+            thinking,
        }
    }

@@ -313,6 +344,14 @@ struct AnthropicRequest {
    stop_sequences: Option<Vec<String>>,
    #[serde(default)]
    stream: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    thinking: Option<AnthropicThinking>,
+}
+
+#[derive(Serialize)]
+struct AnthropicThinking {
+    r#type: String,
+    budget_tokens: u32,
 }

 #[derive(Serialize)]