fix(growth,kernel,runtime,desktop): 50 轮功能链路审计 7 项断链修复
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

P0 修复:
- B-MEM-2: 跨会话记忆丢失 — 添加 IdentityRecall 查询意图检测,
  身份类查询绕过 FTS5/LIKE 文本搜索,直接按 scope 检索全部偏好+知识记忆;
  缓存 GrowthIntegration 到 Kernel 避免每次请求重建空 scorer
- B-HAND-1: Hands 未触发 — 创建 HandTool wrapper 实现 Tool trait,
  在 create_tool_registry() 中注册所有已启用 Hands 为 LLM 可调用工具

P1 修复:
- B-SCHED-4: 一次性定时未拦截 — 添加 RE_ONE_SHOT_TODAY 正则匹配
  "下午3点半提醒我..."类无日期前缀的同日触发模式
- B-CHAT-2: 工具调用循环 — ToolErrorMiddleware 添加连续失败计数器,
  3 次连续失败后自动 AbortLoop 防止无限重试
- B-CHAT-5: Stream 竞态 — cancelStream 后添加 500ms cancelCooldown,
  防止后端 active-stream 检查竞态
This commit is contained in:
iven
2026-04-20 09:43:38 +08:00
parent 24b866fc28
commit f2917366a8
10 changed files with 746 additions and 49 deletions

View File

@@ -4,12 +4,16 @@
//! Inspired by DeerFlow's ToolErrorMiddleware: instead of propagating raw errors
//! that crash the agent loop, this middleware wraps tool errors into a structured
//! format that the LLM can use to self-correct.
//!
//! Also tracks consecutive tool failures across different tools — if N consecutive
//! tool calls all fail, the loop is aborted to prevent infinite retry cycles.
use async_trait::async_trait;
use serde_json::Value;
use zclaw_types::Result;
use crate::driver::ContentBlock;
use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
use std::sync::Mutex;
/// Middleware that intercepts tool call errors and formats recovery messages.
///
@@ -17,12 +21,18 @@ use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
pub struct ToolErrorMiddleware {
/// Maximum error message length before truncation.
max_error_length: usize,
/// Maximum consecutive failures before aborting the loop.
max_consecutive_failures: u32,
/// Tracks consecutive tool failures.
consecutive_failures: Mutex<u32>,
}
impl ToolErrorMiddleware {
pub fn new() -> Self {
Self {
max_error_length: 500,
max_consecutive_failures: 3,
consecutive_failures: Mutex::new(0),
}
}
@@ -61,7 +71,6 @@ impl AgentMiddleware for ToolErrorMiddleware {
tool_input: &Value,
) -> Result<ToolCallDecision> {
// Pre-validate tool input structure for common issues.
// This catches malformed JSON inputs before they reach the tool executor.
if tool_input.is_null() {
tracing::warn!(
"[ToolErrorMiddleware] Tool '{}' received null input — replacing with empty object",
@@ -69,6 +78,19 @@ impl AgentMiddleware for ToolErrorMiddleware {
);
return Ok(ToolCallDecision::ReplaceInput(serde_json::json!({})));
}
// Check consecutive failure count — abort if too many failures
let failures = self.consecutive_failures.lock().unwrap_or_else(|e| e.into_inner());
if *failures >= self.max_consecutive_failures {
tracing::warn!(
"[ToolErrorMiddleware] Aborting loop: {} consecutive tool failures",
*failures
);
return Ok(ToolCallDecision::AbortLoop(
format!("连续 {} 次工具调用失败,已自动终止以避免无限重试", *failures)
));
}
Ok(ToolCallDecision::Allow)
}
@@ -78,14 +100,16 @@ impl AgentMiddleware for ToolErrorMiddleware {
tool_name: &str,
result: &Value,
) -> Result<()> {
let mut failures = self.consecutive_failures.lock().unwrap_or_else(|e| e.into_inner());
// Check if the tool result indicates an error.
if let Some(error) = result.get("error") {
*failures += 1;
let error_msg = match error {
Value::String(s) => s.clone(),
other => other.to_string(),
};
let truncated = if error_msg.len() > self.max_error_length {
// Use char-boundary-safe truncation to avoid panic on UTF-8 strings (e.g. Chinese)
let end = error_msg.floor_char_boundary(self.max_error_length);
format!("{}...(truncated)", &error_msg[..end])
} else {
@@ -93,19 +117,19 @@ impl AgentMiddleware for ToolErrorMiddleware {
};
tracing::warn!(
"[ToolErrorMiddleware] Tool '{}' failed: {}",
tool_name, truncated
"[ToolErrorMiddleware] Tool '{}' failed ({}/{} consecutive): {}",
tool_name, *failures, self.max_consecutive_failures, truncated
);
// Build a guided recovery message so the LLM can self-correct.
let guided_message = self.format_tool_error(tool_name, &truncated);
// Inject into response_content so the agent loop feeds this back
// to the LLM alongside the raw tool result.
ctx.response_content.push(ContentBlock::Text {
text: guided_message,
});
} else {
// Success — reset consecutive failure counter
*failures = 0;
}
Ok(())
}
}