fix(growth,kernel,runtime,desktop): 50 轮功能链路审计 7 项断链修复

P0 修复: - B-MEM-2: 跨会话记忆丢失 — 添加 IdentityRecall 查询意图检测，身份类查询绕过 FTS5/LIKE 文本搜索，直接按 scope 检索全部偏好+知识记忆；缓存 GrowthIntegration 到 Kernel 避免每次请求重建空 scorer - B-HAND-1: Hands 未触发 — 创建 HandTool wrapper 实现 Tool trait，在 create_tool_registry() 中注册所有已启用 Hands 为 LLM 可调用工具 P1 修复: - B-SCHED-4: 一次性定时未拦截 — 添加 RE_ONE_SHOT_TODAY 正则匹配 "下午3点半提醒我..."类无日期前缀的同日触发模式 - B-CHAT-2: 工具调用循环 — ToolErrorMiddleware 添加连续失败计数器， 3 次连续失败后自动 AbortLoop 防止无限重试 - B-CHAT-5: Stream 竞态 — cancelStream 后添加 500ms cancelCooldown，防止后端 active-stream 检查竞态
2026-04-20 09:43:38 +08:00
parent 24b866fc28
commit f2917366a8
10 changed files with 746 additions and 49 deletions
--- a/crates/zclaw-runtime/src/middleware/tool_error.rs
+++ b/crates/zclaw-runtime/src/middleware/tool_error.rs
@@ -4,12 +4,16 @@
 //! Inspired by DeerFlow's ToolErrorMiddleware: instead of propagating raw errors
 //! that crash the agent loop, this middleware wraps tool errors into a structured
 //! format that the LLM can use to self-correct.
+//!
+//! Also tracks consecutive tool failures across different tools — if N consecutive
+//! tool calls all fail, the loop is aborted to prevent infinite retry cycles.

 use async_trait::async_trait;
 use serde_json::Value;
 use zclaw_types::Result;
 use crate::driver::ContentBlock;
 use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
+use std::sync::Mutex;

 /// Middleware that intercepts tool call errors and formats recovery messages.
 ///
@@ -17,12 +21,18 @@ use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
 pub struct ToolErrorMiddleware {
    /// Maximum error message length before truncation.
    max_error_length: usize,
+    /// Maximum consecutive failures before aborting the loop.
+    max_consecutive_failures: u32,
+    /// Tracks consecutive tool failures.
+    consecutive_failures: Mutex<u32>,
 }

 impl ToolErrorMiddleware {
    pub fn new() -> Self {
        Self {
            max_error_length: 500,
+            max_consecutive_failures: 3,
+            consecutive_failures: Mutex::new(0),
        }
    }

@@ -61,7 +71,6 @@ impl AgentMiddleware for ToolErrorMiddleware {
        tool_input: &Value,
    ) -> Result<ToolCallDecision> {
        // Pre-validate tool input structure for common issues.
-        // This catches malformed JSON inputs before they reach the tool executor.
        if tool_input.is_null() {
            tracing::warn!(
                "[ToolErrorMiddleware] Tool '{}' received null input — replacing with empty object",
@@ -69,6 +78,19 @@ impl AgentMiddleware for ToolErrorMiddleware {
            );
            return Ok(ToolCallDecision::ReplaceInput(serde_json::json!({})));
        }
+
+        // Check consecutive failure count — abort if too many failures
+        let failures = self.consecutive_failures.lock().unwrap_or_else(|e| e.into_inner());
+        if *failures >= self.max_consecutive_failures {
+            tracing::warn!(
+                "[ToolErrorMiddleware] Aborting loop: {} consecutive tool failures",
+                *failures
+            );
+            return Ok(ToolCallDecision::AbortLoop(
+                format!("连续 {} 次工具调用失败，已自动终止以避免无限重试", *failures)
+            ));
+        }
+
        Ok(ToolCallDecision::Allow)
    }

@@ -78,14 +100,16 @@ impl AgentMiddleware for ToolErrorMiddleware {
        tool_name: &str,
        result: &Value,
    ) -> Result<()> {
+        let mut failures = self.consecutive_failures.lock().unwrap_or_else(|e| e.into_inner());
+
        // Check if the tool result indicates an error.
        if let Some(error) = result.get("error") {
+            *failures += 1;
            let error_msg = match error {
                Value::String(s) => s.clone(),
                other => other.to_string(),
            };
            let truncated = if error_msg.len() > self.max_error_length {
-                // Use char-boundary-safe truncation to avoid panic on UTF-8 strings (e.g. Chinese)
                let end = error_msg.floor_char_boundary(self.max_error_length);
                format!("{}...(truncated)", &error_msg[..end])
            } else {
@@ -93,19 +117,19 @@ impl AgentMiddleware for ToolErrorMiddleware {
            };

            tracing::warn!(
-                "[ToolErrorMiddleware] Tool '{}' failed: {}",
-                tool_name, truncated
+                "[ToolErrorMiddleware] Tool '{}' failed ({}/{} consecutive): {}",
+                tool_name, *failures, self.max_consecutive_failures, truncated
            );

-            // Build a guided recovery message so the LLM can self-correct.
            let guided_message = self.format_tool_error(tool_name, &truncated);
-
-            // Inject into response_content so the agent loop feeds this back
-            // to the LLM alongside the raw tool result.
            ctx.response_content.push(ContentBlock::Text {
                text: guided_message,
            });
+        } else {
+            // Success — reset consecutive failure counter
+            *failures = 0;
        }
+
        Ok(())
    }
 }