refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions
--- a/crates/zclaw-runtime/src/middleware/dangling_tool.rs
+++ b/crates/zclaw-runtime/src/middleware/dangling_tool.rs
@@ -0,0 +1,125 @@
+//! Dangling tool-call repair middleware — detects and patches missing tool-result
+//! messages that would cause LLM API errors.
+//!
+//! When the LLM produces a `ToolUse` content block but the agent loop fails to
+//! produce a corresponding `ToolResult` message (e.g. due to a crash or timeout),
+//! the conversation history becomes inconsistent. The next LLM call would fail with
+//! an API error because ToolUse messages must be followed by ToolResult messages.
+//!
+//! This middleware inspects the message history before each completion and appends
+//! placeholder ToolResult messages for any dangling ToolUse entries.
+
+use std::collections::HashSet;
+
+use async_trait::async_trait;
+use zclaw_types::{Message, Result};
+use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
+
+/// Middleware that repairs dangling tool-use blocks in conversation history.
+///
+/// Priority 300 — runs before tool error middleware (350) and guardrail (400).
+pub struct DanglingToolMiddleware;
+
+impl DanglingToolMiddleware {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl Default for DanglingToolMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for DanglingToolMiddleware {
+    fn name(&self) -> &str { "dangling_tool" }
+    fn priority(&self) -> i32 { 300 }
+
+    async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
+        let mut patched_count = 0usize;
+
+        // Step 1: Collect all ToolUse IDs and matched ToolResult IDs across the
+        // entire message list (not just adjacent pairs).
+        let mut tool_use_ids: Vec<(String, String)> = Vec::new(); // (id, tool_name)
+        let mut tool_result_ids: HashSet<String> = HashSet::new();
+
+        for msg in &ctx.messages {
+            match msg {
+                Message::ToolUse { ref id, ref tool, .. } => {
+                    tool_use_ids.push((id.clone(), tool.as_str().to_string()));
+                }
+                Message::ToolResult { ref tool_call_id, ref output, .. } => {
+                    // Original results always count as matched regardless of patch status.
+                    // We insert unconditionally so that the HashSet contains the ID,
+                    // preventing false-positive "dangling" detection.
+                    let _ = output; // suppress unused warning — patch check is informational only
+                    tool_result_ids.insert(tool_call_id.clone());
+                }
+                _ => {}
+            }
+        }
+
+        // Step 2: Find dangling ToolUse entries that have no matching ToolResult.
+        let dangling_ids: HashSet<String> = tool_use_ids.iter()
+            .filter(|(id, _)| !tool_result_ids.contains(id))
+            .map(|(id, _)| id.clone())
+            .collect();
+
+        if dangling_ids.is_empty() {
+            return Ok(MiddlewareDecision::Continue);
+        }
+
+        // Step 3: Insert placeholder ToolResult for each dangling ToolUse.
+        // Also skip ToolUse entries that already have a patched placeholder further
+        // down the list (prevents double-patching if the middleware runs twice).
+        let capacity = ctx.messages.len() + dangling_ids.len();
+        let mut patched_messages: Vec<Message> = Vec::with_capacity(capacity);
+
+        for msg in &ctx.messages {
+            patched_messages.push(msg.clone());
+
+            if let Message::ToolUse { ref id, ref tool, .. } = msg {
+                if dangling_ids.contains(id) {
+                    tracing::warn!(
+                        "[DanglingToolMiddleware] Patching dangling ToolUse: tool={}, id={}",
+                        tool.as_str(), id
+                    );
+                    let placeholder = Message::tool_result(
+                        id.clone(),
+                        tool.clone(),
+                        serde_json::json!({
+                            "error": "Tool execution was interrupted. Please retry or use an alternative approach.",
+                            "tool_patch": true,
+                        }),
+                        true, // is_error
+                    );
+                    patched_messages.push(placeholder);
+                    patched_count += 1;
+                }
+            }
+        }
+
+        // Step 4: Detect streaming interrupt — if the last message is an Assistant
+        // response while there were dangling tools, the user likely interrupted a
+        // streaming response mid-tool-execution.  No additional action is needed
+        // beyond the patched ToolResult messages that now prevent API errors.
+        if let Some(Message::Assistant { .. }) = patched_messages.last() {
+            tracing::debug!(
+                "[DanglingToolMiddleware] Streaming interrupt detected with {} dangling tools",
+                patched_count
+            );
+        }
+
+        if patched_count > 0 {
+            tracing::info!(
+                "[DanglingToolMiddleware] Patched {} dangling tool-use blocks",
+                patched_count
+            );
+            ctx.messages = patched_messages;
+        }
+
+        Ok(MiddlewareDecision::Continue)
+    }
+}
--- a/crates/zclaw-runtime/src/middleware/loop_guard.rs
+++ b/crates/zclaw-runtime/src/middleware/loop_guard.rs
@@ -41,7 +41,7 @@ impl AgentMiddleware for LoopGuardMiddleware {
        match result {
            LoopGuardResult::CircuitBreaker => {
                tracing::warn!("[LoopGuardMiddleware] Circuit breaker triggered by tool '{}'", tool_name);
-                Ok(ToolCallDecision::Block("检测到工具调用循环，已自动终止".to_string()))
+                Ok(ToolCallDecision::AbortLoop("检测到工具调用循环，已自动终止".to_string()))
            }
            LoopGuardResult::Blocked => {
                tracing::warn!("[LoopGuardMiddleware] Tool '{}' blocked", tool_name);
--- a/crates/zclaw-runtime/src/middleware/memory.rs
+++ b/crates/zclaw-runtime/src/middleware/memory.rs
@@ -60,34 +60,39 @@ impl AgentMiddleware for MemoryMiddleware {
    fn priority(&self) -> i32 { 150 }

    async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
-        // Skip memory injection for very short queries.
-        // Short queries (e.g., "1+6", "hi", "好") don't benefit from memory context.
-        // Worse, the retriever's scope-based fallback may return high-importance but
-        // irrelevant old memories, causing the model to think about past conversations
-        // instead of answering the current question.
-        // Use char count (not byte count) so CJK queries are handled correctly:
-        // a single Chinese char is 3 UTF-8 bytes but 1 meaningful character.
-        let query = ctx.user_input.trim();
-        if query.chars().count() < 2 {
-            tracing::debug!(
-                "[MemoryMiddleware] Skipping enhancement for short query ({:?}): no memory context needed",
-                query
-            );
-            return Ok(MiddlewareDecision::Continue);
-        }
+        tracing::debug!(
+            "[MemoryMiddleware] before_completion for query: {:?}",
+            ctx.user_input.chars().take(50).collect::<String>()
+        );

-        match self.growth.enhance_prompt(
-            &ctx.agent_id,
-            &ctx.system_prompt,
-            &ctx.user_input,
-        ).await {
+        // Retrieve relevant memories and inject into system prompt.
+        // The SqliteStorage retriever now uses FTS5-only matching — if FTS5 finds
+        // no relevant results, no memories are returned (no scope-based fallback).
+        // This prevents irrelevant high-importance memories from leaking into
+        // unrelated conversations.
+        let base = &ctx.system_prompt;
+        match self.growth.enhance_prompt(&ctx.agent_id, base, &ctx.user_input).await {
            Ok(enhanced) => {
-                ctx.system_prompt = enhanced;
+                if enhanced != *base {
+                    tracing::info!(
+                        "[MemoryMiddleware] Injected memories into system prompt for agent {}",
+                        ctx.agent_id
+                    );
+                    ctx.system_prompt = enhanced;
+                } else {
+                    tracing::debug!(
+                        "[MemoryMiddleware] No relevant memories found for query: {:?}",
+                        ctx.user_input.chars().take(50).collect::<String>()
+                    );
+                }
                Ok(MiddlewareDecision::Continue)
            }
            Err(e) => {
-                // Non-fatal: memory retrieval failure should not block the loop
-                tracing::warn!("[MemoryMiddleware] Prompt enhancement failed: {}", e);
+                // Non-fatal: retrieval failure should not block the conversation
+                tracing::warn!(
+                    "[MemoryMiddleware] Memory retrieval failed (non-fatal): {}",
+                    e
+                );
                Ok(MiddlewareDecision::Continue)
            }
        }
--- a/crates/zclaw-runtime/src/middleware/subagent_limit.rs
+++ b/crates/zclaw-runtime/src/middleware/subagent_limit.rs
@@ -0,0 +1,87 @@
+//! Sub-agent limit middleware — enforces limits on sub-agent spawning.
+//!
+//! Prevents runaway sub-agent spawning by enforcing a per-turn total cap.
+//! The `running` counter was removed because it leaked when subsequent
+//! middleware blocked the tool call (before_tool_call increments but
+//! after_tool_call never fires for blocked tools).
+
+use async_trait::async_trait;
+use serde_json::Value;
+use zclaw_types::Result;
+use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
+
+/// Default maximum total sub-agents per conversation turn.
+const DEFAULT_MAX_TOTAL: usize = 10;
+
+/// Middleware that limits total sub-agent spawn count per turn.
+///
+/// Priority 550 — runs after loop guard (500).
+pub struct SubagentLimitMiddleware {
+    /// Maximum total sub-agents per conversation turn.
+    max_total: usize,
+    /// Total sub-agents spawned in this turn.
+    total_spawned: std::sync::atomic::AtomicUsize,
+}
+
+impl SubagentLimitMiddleware {
+    pub fn new() -> Self {
+        Self {
+            max_total: DEFAULT_MAX_TOTAL,
+            total_spawned: std::sync::atomic::AtomicUsize::new(0),
+        }
+    }
+
+    pub fn with_max_total(mut self, n: usize) -> Self {
+        self.max_total = n;
+        self
+    }
+
+    /// Check if a tool call is a sub-agent spawn request.
+    fn is_subagent_tool(tool_name: &str) -> bool {
+        matches!(tool_name, "task" | "delegate" | "spawn_agent" | "subagent")
+    }
+}
+
+impl Default for SubagentLimitMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for SubagentLimitMiddleware {
+    fn name(&self) -> &str { "subagent_limit" }
+    fn priority(&self) -> i32 { 550 }
+
+    async fn before_tool_call(
+        &self,
+        _ctx: &MiddlewareContext,
+        tool_name: &str,
+        _tool_input: &Value,
+    ) -> Result<ToolCallDecision> {
+        if !Self::is_subagent_tool(tool_name) {
+            return Ok(ToolCallDecision::Allow);
+        }
+
+        let total = self.total_spawned.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
+        if total >= self.max_total {
+            self.total_spawned.fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
+            tracing::warn!(
+                "[SubagentLimitMiddleware] Total sub-agent limit ({}) reached — blocking spawn",
+                self.max_total
+            );
+            return Ok(ToolCallDecision::Block(format!(
+                "子Agent总数量已达上限 ({})，请优先完成现有任务后再发起新任务。",
+                self.max_total
+            )));
+        }
+
+        Ok(ToolCallDecision::Allow)
+    }
+
+    async fn after_completion(&self, _ctx: &MiddlewareContext) -> Result<()> {
+        // Reset per-turn counter after the agent loop turn completes.
+        self.total_spawned.store(0, std::sync::atomic::Ordering::SeqCst);
+        Ok(())
+    }
+}
--- a/crates/zclaw-runtime/src/middleware/title.rs
+++ b/crates/zclaw-runtime/src/middleware/title.rs
@@ -5,22 +5,29 @@
 //! "新对话" or truncating the user's first message.
 //!
 //! Priority 180 — runs after compaction (100) and memory (150), before skill index (200).
+//!
+//! NOTE: This is a structural placeholder. Full implementation requires an LLM driver
+//! reference to generate titles asynchronously, which will be wired through the
+//! middleware context in a future iteration. For now it simply passes through.

 use async_trait::async_trait;
-use zclaw_types::Result;
-use crate::middleware::{AgentMiddleware, MiddlewareContext};
+use crate::middleware::{AgentMiddleware, MiddlewareDecision};

 /// Middleware that auto-generates conversation titles after the first exchange.
+///
+/// When fully implemented, this will:
+/// 1. Detect the first user-assistant exchange (via message count)
+/// 2. Call the LLM with a short prompt to generate a descriptive title
+/// 3. Update the session title via the middleware context
+///
+/// For now, it serves as a registered placeholder in the middleware chain.
 pub struct TitleMiddleware {
-    /// Whether a title has been generated for the current session.
-    titled: std::sync::atomic::AtomicBool,
+    _reserved: (),
 }

 impl TitleMiddleware {
    pub fn new() -> Self {
-        Self {
-            titled: std::sync::atomic::AtomicBool::new(false),
-        }
+        Self { _reserved: () }
    }
 }

@@ -34,4 +41,9 @@ impl Default for TitleMiddleware {
 impl AgentMiddleware for TitleMiddleware {
    fn name(&self) -> &str { "title" }
    fn priority(&self) -> i32 { 180 }
+
+    // All hooks default to Continue — placeholder until LLM driver is wired in.
+    async fn before_completion(&self, _ctx: &mut crate::middleware::MiddlewareContext) -> zclaw_types::Result<MiddlewareDecision> {
+        Ok(MiddlewareDecision::Continue)
+    }
 }
--- a/crates/zclaw-runtime/src/middleware/tool_error.rs
+++ b/crates/zclaw-runtime/src/middleware/tool_error.rs
@@ -0,0 +1,111 @@
+//! Tool error middleware — catches tool execution errors and converts them
+//! into well-formed tool-result messages for the LLM to recover from.
+//!
+//! Inspired by DeerFlow's ToolErrorMiddleware: instead of propagating raw errors
+//! that crash the agent loop, this middleware wraps tool errors into a structured
+//! format that the LLM can use to self-correct.
+
+use async_trait::async_trait;
+use serde_json::Value;
+use zclaw_types::Result;
+use crate::driver::ContentBlock;
+use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
+
+/// Middleware that intercepts tool call errors and formats recovery messages.
+///
+/// Priority 350 — runs after dangling tool repair (300) and before guardrail (400).
+pub struct ToolErrorMiddleware {
+    /// Maximum error message length before truncation.
+    max_error_length: usize,
+}
+
+impl ToolErrorMiddleware {
+    pub fn new() -> Self {
+        Self {
+            max_error_length: 500,
+        }
+    }
+
+    /// Create with a custom max error length.
+    pub fn with_max_error_length(mut self, len: usize) -> Self {
+        self.max_error_length = len;
+        self
+    }
+
+    /// Format a tool error into a guided recovery message for the LLM.
+    ///
+    /// The caller is responsible for truncation before passing `error`.
+    fn format_tool_error(&self, tool_name: &str, error: &str) -> String {
+        format!(
+            "工具 '{}' 执行失败。错误信息: {}\n请分析错误原因，尝试修正参数后重试，或使用其他方法完成任务。",
+            tool_name, error
+        )
+    }
+}
+
+impl Default for ToolErrorMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for ToolErrorMiddleware {
+    fn name(&self) -> &str { "tool_error" }
+    fn priority(&self) -> i32 { 350 }
+
+    async fn before_tool_call(
+        &self,
+        _ctx: &MiddlewareContext,
+        tool_name: &str,
+        tool_input: &Value,
+    ) -> Result<ToolCallDecision> {
+        // Pre-validate tool input structure for common issues.
+        // This catches malformed JSON inputs before they reach the tool executor.
+        if tool_input.is_null() {
+            tracing::warn!(
+                "[ToolErrorMiddleware] Tool '{}' received null input — replacing with empty object",
+                tool_name
+            );
+            return Ok(ToolCallDecision::ReplaceInput(serde_json::json!({})));
+        }
+        Ok(ToolCallDecision::Allow)
+    }
+
+    async fn after_tool_call(
+        &self,
+        ctx: &mut MiddlewareContext,
+        tool_name: &str,
+        result: &Value,
+    ) -> Result<()> {
+        // Check if the tool result indicates an error.
+        if let Some(error) = result.get("error") {
+            let error_msg = match error {
+                Value::String(s) => s.clone(),
+                other => other.to_string(),
+            };
+            let truncated = if error_msg.len() > self.max_error_length {
+                // Use char-boundary-safe truncation to avoid panic on UTF-8 strings (e.g. Chinese)
+                let end = error_msg.floor_char_boundary(self.max_error_length);
+                format!("{}...(truncated)", &error_msg[..end])
+            } else {
+                error_msg.clone()
+            };
+
+            tracing::warn!(
+                "[ToolErrorMiddleware] Tool '{}' failed: {}",
+                tool_name, truncated
+            );
+
+            // Build a guided recovery message so the LLM can self-correct.
+            let guided_message = self.format_tool_error(tool_name, &truncated);
+
+            // Inject into response_content so the agent loop feeds this back
+            // to the LLM alongside the raw tool result.
+            ctx.response_content.push(ContentBlock::Text {
+                text: guided_message,
+            });
+        }
+        Ok(())
+    }
+}
--- a/crates/zclaw-runtime/src/middleware/tool_output_guard.rs
+++ b/crates/zclaw-runtime/src/middleware/tool_output_guard.rs
@@ -0,0 +1,132 @@
+//! Tool output sanitization middleware — inspects tool results for risky content
+//! before they flow back into the LLM context.
+//!
+//! Inspired by DeerFlow's missing "Toxic Output Loop" defense — ZCLAW proactively
+//! implements post-execution output checking.
+//!
+//! Rules:
+//! - Output length cap: warns when tool output exceeds threshold
+//! - Sensitive pattern detection: flags API keys, tokens, passwords
+//! - Injection marker detection: flags common prompt-injection patterns
+//!
+//! This middleware does NOT modify content. It only logs warnings at appropriate levels.
+
+use async_trait::async_trait;
+use serde_json::Value;
+use zclaw_types::Result;
+
+use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
+
+/// Maximum safe output length in characters.
+const MAX_OUTPUT_LENGTH: usize = 50_000;
+
+/// Patterns that indicate sensitive information in tool output.
+const SENSITIVE_PATTERNS: &[&str] = &[
+    "api_key",
+    "apikey",
+    "api-key",
+    "secret_key",
+    "secretkey",
+    "access_token",
+    "auth_token",
+    "password",
+    "private_key",
+    "-----BEGIN RSA",
+    "-----BEGIN PRIVATE",
+    "sk-",           // OpenAI API keys
+    "sk_live_",      // Stripe keys
+    "AKIA",          // AWS access keys
+];
+
+/// Patterns that may indicate prompt injection in tool output.
+const INJECTION_PATTERNS: &[&str] = &[
+    "ignore previous instructions",
+    "ignore all previous",
+    "disregard your instructions",
+    "you are now",
+    "new instructions:",
+    "system:",
+    "[INST]",
+    "</scratchpad>",
+    "think step by step about",
+];
+
+/// Tool output sanitization middleware.
+///
+/// Priority 360 — runs after ToolErrorMiddleware (350), before GuardrailMiddleware (400).
+pub struct ToolOutputGuardMiddleware {
+    max_output_length: usize,
+}
+
+impl ToolOutputGuardMiddleware {
+    pub fn new() -> Self {
+        Self {
+            max_output_length: MAX_OUTPUT_LENGTH,
+        }
+    }
+}
+
+impl Default for ToolOutputGuardMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for ToolOutputGuardMiddleware {
+    fn name(&self) -> &str { "tool_output_guard" }
+    fn priority(&self) -> i32 { 360 }
+
+    async fn before_tool_call(
+        &self,
+        _ctx: &MiddlewareContext,
+        _tool_name: &str,
+        _tool_input: &Value,
+    ) -> Result<ToolCallDecision> {
+        // No pre-execution checks — this middleware only inspects output
+        Ok(ToolCallDecision::Allow)
+    }
+
+    async fn after_tool_call(
+        &self,
+        _ctx: &mut MiddlewareContext,
+        tool_name: &str,
+        result: &Value,
+    ) -> Result<()> {
+        let output_str = serde_json::to_string(result).unwrap_or_default();
+        let output_len = output_str.len();
+
+        // Rule 1: Output length check
+        if output_len > self.max_output_length {
+            tracing::warn!(
+                "[ToolOutputGuard] Tool '{}' returned oversized output: {} chars (limit: {})",
+                tool_name, output_len, self.max_output_length
+            );
+        }
+
+        // Rule 2: Sensitive information detection
+        let output_lower = output_str.to_lowercase();
+        for pattern in SENSITIVE_PATTERNS {
+            if output_lower.contains(pattern) {
+                tracing::warn!(
+                    "[ToolOutputGuard] Tool '{}' output contains sensitive pattern: '{}'",
+                    tool_name, pattern
+                );
+                break; // Only warn once per tool call
+            }
+        }
+
+        // Rule 3: Injection marker detection
+        for pattern in INJECTION_PATTERNS {
+            if output_lower.contains(pattern) {
+                tracing::warn!(
+                    "[ToolOutputGuard] Tool '{}' output contains potential injection marker: '{}'",
+                    tool_name, pattern
+                );
+                break; // Only warn once per tool call
+            }
+        }
+
+        Ok(())
+    }
+}