refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions
--- a/crates/zclaw-runtime/src/middleware/dangling_tool.rs
+++ b/crates/zclaw-runtime/src/middleware/dangling_tool.rs
@@ -0,0 +1,125 @@
+//! Dangling tool-call repair middleware — detects and patches missing tool-result
+//! messages that would cause LLM API errors.
+//!
+//! When the LLM produces a `ToolUse` content block but the agent loop fails to
+//! produce a corresponding `ToolResult` message (e.g. due to a crash or timeout),
+//! the conversation history becomes inconsistent. The next LLM call would fail with
+//! an API error because ToolUse messages must be followed by ToolResult messages.
+//!
+//! This middleware inspects the message history before each completion and appends
+//! placeholder ToolResult messages for any dangling ToolUse entries.
+
+use std::collections::HashSet;
+
+use async_trait::async_trait;
+use zclaw_types::{Message, Result};
+use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
+
+/// Middleware that repairs dangling tool-use blocks in conversation history.
+///
+/// Priority 300 — runs before tool error middleware (350) and guardrail (400).
+pub struct DanglingToolMiddleware;
+
+impl DanglingToolMiddleware {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl Default for DanglingToolMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for DanglingToolMiddleware {
+    fn name(&self) -> &str { "dangling_tool" }
+    fn priority(&self) -> i32 { 300 }
+
+    async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
+        let mut patched_count = 0usize;
+
+        // Step 1: Collect all ToolUse IDs and matched ToolResult IDs across the
+        // entire message list (not just adjacent pairs).
+        let mut tool_use_ids: Vec<(String, String)> = Vec::new(); // (id, tool_name)
+        let mut tool_result_ids: HashSet<String> = HashSet::new();
+
+        for msg in &ctx.messages {
+            match msg {
+                Message::ToolUse { ref id, ref tool, .. } => {
+                    tool_use_ids.push((id.clone(), tool.as_str().to_string()));
+                }
+                Message::ToolResult { ref tool_call_id, ref output, .. } => {
+                    // Original results always count as matched regardless of patch status.
+                    // We insert unconditionally so that the HashSet contains the ID,
+                    // preventing false-positive "dangling" detection.
+                    let _ = output; // suppress unused warning — patch check is informational only
+                    tool_result_ids.insert(tool_call_id.clone());
+                }
+                _ => {}
+            }
+        }
+
+        // Step 2: Find dangling ToolUse entries that have no matching ToolResult.
+        let dangling_ids: HashSet<String> = tool_use_ids.iter()
+            .filter(|(id, _)| !tool_result_ids.contains(id))
+            .map(|(id, _)| id.clone())
+            .collect();
+
+        if dangling_ids.is_empty() {
+            return Ok(MiddlewareDecision::Continue);
+        }
+
+        // Step 3: Insert placeholder ToolResult for each dangling ToolUse.
+        // Also skip ToolUse entries that already have a patched placeholder further
+        // down the list (prevents double-patching if the middleware runs twice).
+        let capacity = ctx.messages.len() + dangling_ids.len();
+        let mut patched_messages: Vec<Message> = Vec::with_capacity(capacity);
+
+        for msg in &ctx.messages {
+            patched_messages.push(msg.clone());
+
+            if let Message::ToolUse { ref id, ref tool, .. } = msg {
+                if dangling_ids.contains(id) {
+                    tracing::warn!(
+                        "[DanglingToolMiddleware] Patching dangling ToolUse: tool={}, id={}",
+                        tool.as_str(), id
+                    );
+                    let placeholder = Message::tool_result(
+                        id.clone(),
+                        tool.clone(),
+                        serde_json::json!({
+                            "error": "Tool execution was interrupted. Please retry or use an alternative approach.",
+                            "tool_patch": true,
+                        }),
+                        true, // is_error
+                    );
+                    patched_messages.push(placeholder);
+                    patched_count += 1;
+                }
+            }
+        }
+
+        // Step 4: Detect streaming interrupt — if the last message is an Assistant
+        // response while there were dangling tools, the user likely interrupted a
+        // streaming response mid-tool-execution.  No additional action is needed
+        // beyond the patched ToolResult messages that now prevent API errors.
+        if let Some(Message::Assistant { .. }) = patched_messages.last() {
+            tracing::debug!(
+                "[DanglingToolMiddleware] Streaming interrupt detected with {} dangling tools",
+                patched_count
+            );
+        }
+
+        if patched_count > 0 {
+            tracing::info!(
+                "[DanglingToolMiddleware] Patched {} dangling tool-use blocks",
+                patched_count
+            );
+            ctx.messages = patched_messages;
+        }
+
+        Ok(MiddlewareDecision::Continue)
+    }
+}