refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules
- Split zclaw-kernel/generation.rs (1080 lines) into 3 modules
- Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard
- Add PromptBuilder for structured system prompt assembly
- Add FactStore (zclaw-memory) for persistent fact extraction
- Add task builtin tool for agent task management
- Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings
- Replace let _ = with proper log::warn! across SaaS handlers
- Remove unused dependency (url) from zclaw-hands
This commit is contained in:
iven
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions

View File

@@ -0,0 +1,125 @@
//! Dangling tool-call repair middleware — detects and patches missing tool-result
//! messages that would cause LLM API errors.
//!
//! When the LLM produces a `ToolUse` content block but the agent loop fails to
//! produce a corresponding `ToolResult` message (e.g. due to a crash or timeout),
//! the conversation history becomes inconsistent. The next LLM call would fail with
//! an API error because ToolUse messages must be followed by ToolResult messages.
//!
//! This middleware inspects the message history before each completion and appends
//! placeholder ToolResult messages for any dangling ToolUse entries.
use std::collections::HashSet;
use async_trait::async_trait;
use zclaw_types::{Message, Result};
use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
/// Middleware that repairs dangling tool-use blocks in conversation history.
///
/// Priority 300 — runs before tool error middleware (350) and guardrail (400).
pub struct DanglingToolMiddleware;
impl DanglingToolMiddleware {
pub fn new() -> Self {
Self
}
}
impl Default for DanglingToolMiddleware {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl AgentMiddleware for DanglingToolMiddleware {
fn name(&self) -> &str { "dangling_tool" }
fn priority(&self) -> i32 { 300 }
async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
let mut patched_count = 0usize;
// Step 1: Collect all ToolUse IDs and matched ToolResult IDs across the
// entire message list (not just adjacent pairs).
let mut tool_use_ids: Vec<(String, String)> = Vec::new(); // (id, tool_name)
let mut tool_result_ids: HashSet<String> = HashSet::new();
for msg in &ctx.messages {
match msg {
Message::ToolUse { ref id, ref tool, .. } => {
tool_use_ids.push((id.clone(), tool.as_str().to_string()));
}
Message::ToolResult { ref tool_call_id, ref output, .. } => {
// Original results always count as matched regardless of patch status.
// We insert unconditionally so that the HashSet contains the ID,
// preventing false-positive "dangling" detection.
let _ = output; // suppress unused warning — patch check is informational only
tool_result_ids.insert(tool_call_id.clone());
}
_ => {}
}
}
// Step 2: Find dangling ToolUse entries that have no matching ToolResult.
let dangling_ids: HashSet<String> = tool_use_ids.iter()
.filter(|(id, _)| !tool_result_ids.contains(id))
.map(|(id, _)| id.clone())
.collect();
if dangling_ids.is_empty() {
return Ok(MiddlewareDecision::Continue);
}
// Step 3: Insert placeholder ToolResult for each dangling ToolUse.
// Also skip ToolUse entries that already have a patched placeholder further
// down the list (prevents double-patching if the middleware runs twice).
let capacity = ctx.messages.len() + dangling_ids.len();
let mut patched_messages: Vec<Message> = Vec::with_capacity(capacity);
for msg in &ctx.messages {
patched_messages.push(msg.clone());
if let Message::ToolUse { ref id, ref tool, .. } = msg {
if dangling_ids.contains(id) {
tracing::warn!(
"[DanglingToolMiddleware] Patching dangling ToolUse: tool={}, id={}",
tool.as_str(), id
);
let placeholder = Message::tool_result(
id.clone(),
tool.clone(),
serde_json::json!({
"error": "Tool execution was interrupted. Please retry or use an alternative approach.",
"tool_patch": true,
}),
true, // is_error
);
patched_messages.push(placeholder);
patched_count += 1;
}
}
}
// Step 4: Detect streaming interrupt — if the last message is an Assistant
// response while there were dangling tools, the user likely interrupted a
// streaming response mid-tool-execution. No additional action is needed
// beyond the patched ToolResult messages that now prevent API errors.
if let Some(Message::Assistant { .. }) = patched_messages.last() {
tracing::debug!(
"[DanglingToolMiddleware] Streaming interrupt detected with {} dangling tools",
patched_count
);
}
if patched_count > 0 {
tracing::info!(
"[DanglingToolMiddleware] Patched {} dangling tool-use blocks",
patched_count
);
ctx.messages = patched_messages;
}
Ok(MiddlewareDecision::Continue)
}
}

View File

@@ -41,7 +41,7 @@ impl AgentMiddleware for LoopGuardMiddleware {
match result {
LoopGuardResult::CircuitBreaker => {
tracing::warn!("[LoopGuardMiddleware] Circuit breaker triggered by tool '{}'", tool_name);
Ok(ToolCallDecision::Block("检测到工具调用循环,已自动终止".to_string()))
Ok(ToolCallDecision::AbortLoop("检测到工具调用循环,已自动终止".to_string()))
}
LoopGuardResult::Blocked => {
tracing::warn!("[LoopGuardMiddleware] Tool '{}' blocked", tool_name);

View File

@@ -60,34 +60,39 @@ impl AgentMiddleware for MemoryMiddleware {
fn priority(&self) -> i32 { 150 }
async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
// Skip memory injection for very short queries.
// Short queries (e.g., "1+6", "hi", "好") don't benefit from memory context.
// Worse, the retriever's scope-based fallback may return high-importance but
// irrelevant old memories, causing the model to think about past conversations
// instead of answering the current question.
// Use char count (not byte count) so CJK queries are handled correctly:
// a single Chinese char is 3 UTF-8 bytes but 1 meaningful character.
let query = ctx.user_input.trim();
if query.chars().count() < 2 {
tracing::debug!(
"[MemoryMiddleware] Skipping enhancement for short query ({:?}): no memory context needed",
query
);
return Ok(MiddlewareDecision::Continue);
}
tracing::debug!(
"[MemoryMiddleware] before_completion for query: {:?}",
ctx.user_input.chars().take(50).collect::<String>()
);
match self.growth.enhance_prompt(
&ctx.agent_id,
&ctx.system_prompt,
&ctx.user_input,
).await {
// Retrieve relevant memories and inject into system prompt.
// The SqliteStorage retriever now uses FTS5-only matching — if FTS5 finds
// no relevant results, no memories are returned (no scope-based fallback).
// This prevents irrelevant high-importance memories from leaking into
// unrelated conversations.
let base = &ctx.system_prompt;
match self.growth.enhance_prompt(&ctx.agent_id, base, &ctx.user_input).await {
Ok(enhanced) => {
ctx.system_prompt = enhanced;
if enhanced != *base {
tracing::info!(
"[MemoryMiddleware] Injected memories into system prompt for agent {}",
ctx.agent_id
);
ctx.system_prompt = enhanced;
} else {
tracing::debug!(
"[MemoryMiddleware] No relevant memories found for query: {:?}",
ctx.user_input.chars().take(50).collect::<String>()
);
}
Ok(MiddlewareDecision::Continue)
}
Err(e) => {
// Non-fatal: memory retrieval failure should not block the loop
tracing::warn!("[MemoryMiddleware] Prompt enhancement failed: {}", e);
// Non-fatal: retrieval failure should not block the conversation
tracing::warn!(
"[MemoryMiddleware] Memory retrieval failed (non-fatal): {}",
e
);
Ok(MiddlewareDecision::Continue)
}
}

View File

@@ -0,0 +1,87 @@
//! Sub-agent limit middleware — enforces limits on sub-agent spawning.
//!
//! Prevents runaway sub-agent spawning by enforcing a per-turn total cap.
//! The `running` counter was removed because it leaked when subsequent
//! middleware blocked the tool call (before_tool_call increments but
//! after_tool_call never fires for blocked tools).
use async_trait::async_trait;
use serde_json::Value;
use zclaw_types::Result;
use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
/// Default maximum total sub-agents per conversation turn.
const DEFAULT_MAX_TOTAL: usize = 10;
/// Middleware that limits total sub-agent spawn count per turn.
///
/// Priority 550 — runs after loop guard (500).
pub struct SubagentLimitMiddleware {
/// Maximum total sub-agents per conversation turn.
max_total: usize,
/// Total sub-agents spawned in this turn.
total_spawned: std::sync::atomic::AtomicUsize,
}
impl SubagentLimitMiddleware {
pub fn new() -> Self {
Self {
max_total: DEFAULT_MAX_TOTAL,
total_spawned: std::sync::atomic::AtomicUsize::new(0),
}
}
pub fn with_max_total(mut self, n: usize) -> Self {
self.max_total = n;
self
}
/// Check if a tool call is a sub-agent spawn request.
fn is_subagent_tool(tool_name: &str) -> bool {
matches!(tool_name, "task" | "delegate" | "spawn_agent" | "subagent")
}
}
impl Default for SubagentLimitMiddleware {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl AgentMiddleware for SubagentLimitMiddleware {
fn name(&self) -> &str { "subagent_limit" }
fn priority(&self) -> i32 { 550 }
async fn before_tool_call(
&self,
_ctx: &MiddlewareContext,
tool_name: &str,
_tool_input: &Value,
) -> Result<ToolCallDecision> {
if !Self::is_subagent_tool(tool_name) {
return Ok(ToolCallDecision::Allow);
}
let total = self.total_spawned.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
if total >= self.max_total {
self.total_spawned.fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
tracing::warn!(
"[SubagentLimitMiddleware] Total sub-agent limit ({}) reached — blocking spawn",
self.max_total
);
return Ok(ToolCallDecision::Block(format!(
"子Agent总数量已达上限 ({}),请优先完成现有任务后再发起新任务。",
self.max_total
)));
}
Ok(ToolCallDecision::Allow)
}
async fn after_completion(&self, _ctx: &MiddlewareContext) -> Result<()> {
// Reset per-turn counter after the agent loop turn completes.
self.total_spawned.store(0, std::sync::atomic::Ordering::SeqCst);
Ok(())
}
}

View File

@@ -5,22 +5,29 @@
//! "新对话" or truncating the user's first message.
//!
//! Priority 180 — runs after compaction (100) and memory (150), before skill index (200).
//!
//! NOTE: This is a structural placeholder. Full implementation requires an LLM driver
//! reference to generate titles asynchronously, which will be wired through the
//! middleware context in a future iteration. For now it simply passes through.
use async_trait::async_trait;
use zclaw_types::Result;
use crate::middleware::{AgentMiddleware, MiddlewareContext};
use crate::middleware::{AgentMiddleware, MiddlewareDecision};
/// Middleware that auto-generates conversation titles after the first exchange.
///
/// When fully implemented, this will:
/// 1. Detect the first user-assistant exchange (via message count)
/// 2. Call the LLM with a short prompt to generate a descriptive title
/// 3. Update the session title via the middleware context
///
/// For now, it serves as a registered placeholder in the middleware chain.
pub struct TitleMiddleware {
/// Whether a title has been generated for the current session.
titled: std::sync::atomic::AtomicBool,
_reserved: (),
}
impl TitleMiddleware {
pub fn new() -> Self {
Self {
titled: std::sync::atomic::AtomicBool::new(false),
}
Self { _reserved: () }
}
}
@@ -34,4 +41,9 @@ impl Default for TitleMiddleware {
impl AgentMiddleware for TitleMiddleware {
fn name(&self) -> &str { "title" }
fn priority(&self) -> i32 { 180 }
// All hooks default to Continue — placeholder until LLM driver is wired in.
async fn before_completion(&self, _ctx: &mut crate::middleware::MiddlewareContext) -> zclaw_types::Result<MiddlewareDecision> {
Ok(MiddlewareDecision::Continue)
}
}

View File

@@ -0,0 +1,111 @@
//! Tool error middleware — catches tool execution errors and converts them
//! into well-formed tool-result messages for the LLM to recover from.
//!
//! Inspired by DeerFlow's ToolErrorMiddleware: instead of propagating raw errors
//! that crash the agent loop, this middleware wraps tool errors into a structured
//! format that the LLM can use to self-correct.
use async_trait::async_trait;
use serde_json::Value;
use zclaw_types::Result;
use crate::driver::ContentBlock;
use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
/// Middleware that intercepts tool call errors and formats recovery messages.
///
/// Priority 350 — runs after dangling tool repair (300) and before guardrail (400).
pub struct ToolErrorMiddleware {
/// Maximum error message length before truncation.
max_error_length: usize,
}
impl ToolErrorMiddleware {
pub fn new() -> Self {
Self {
max_error_length: 500,
}
}
/// Create with a custom max error length.
pub fn with_max_error_length(mut self, len: usize) -> Self {
self.max_error_length = len;
self
}
/// Format a tool error into a guided recovery message for the LLM.
///
/// The caller is responsible for truncation before passing `error`.
fn format_tool_error(&self, tool_name: &str, error: &str) -> String {
format!(
"工具 '{}' 执行失败。错误信息: {}\n请分析错误原因,尝试修正参数后重试,或使用其他方法完成任务。",
tool_name, error
)
}
}
impl Default for ToolErrorMiddleware {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl AgentMiddleware for ToolErrorMiddleware {
fn name(&self) -> &str { "tool_error" }
fn priority(&self) -> i32 { 350 }
async fn before_tool_call(
&self,
_ctx: &MiddlewareContext,
tool_name: &str,
tool_input: &Value,
) -> Result<ToolCallDecision> {
// Pre-validate tool input structure for common issues.
// This catches malformed JSON inputs before they reach the tool executor.
if tool_input.is_null() {
tracing::warn!(
"[ToolErrorMiddleware] Tool '{}' received null input — replacing with empty object",
tool_name
);
return Ok(ToolCallDecision::ReplaceInput(serde_json::json!({})));
}
Ok(ToolCallDecision::Allow)
}
async fn after_tool_call(
&self,
ctx: &mut MiddlewareContext,
tool_name: &str,
result: &Value,
) -> Result<()> {
// Check if the tool result indicates an error.
if let Some(error) = result.get("error") {
let error_msg = match error {
Value::String(s) => s.clone(),
other => other.to_string(),
};
let truncated = if error_msg.len() > self.max_error_length {
// Use char-boundary-safe truncation to avoid panic on UTF-8 strings (e.g. Chinese)
let end = error_msg.floor_char_boundary(self.max_error_length);
format!("{}...(truncated)", &error_msg[..end])
} else {
error_msg.clone()
};
tracing::warn!(
"[ToolErrorMiddleware] Tool '{}' failed: {}",
tool_name, truncated
);
// Build a guided recovery message so the LLM can self-correct.
let guided_message = self.format_tool_error(tool_name, &truncated);
// Inject into response_content so the agent loop feeds this back
// to the LLM alongside the raw tool result.
ctx.response_content.push(ContentBlock::Text {
text: guided_message,
});
}
Ok(())
}
}

View File

@@ -0,0 +1,132 @@
//! Tool output sanitization middleware — inspects tool results for risky content
//! before they flow back into the LLM context.
//!
//! Inspired by DeerFlow's missing "Toxic Output Loop" defense — ZCLAW proactively
//! implements post-execution output checking.
//!
//! Rules:
//! - Output length cap: warns when tool output exceeds threshold
//! - Sensitive pattern detection: flags API keys, tokens, passwords
//! - Injection marker detection: flags common prompt-injection patterns
//!
//! This middleware does NOT modify content. It only logs warnings at appropriate levels.
use async_trait::async_trait;
use serde_json::Value;
use zclaw_types::Result;
use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
/// Maximum safe output length in characters.
const MAX_OUTPUT_LENGTH: usize = 50_000;
/// Patterns that indicate sensitive information in tool output.
const SENSITIVE_PATTERNS: &[&str] = &[
"api_key",
"apikey",
"api-key",
"secret_key",
"secretkey",
"access_token",
"auth_token",
"password",
"private_key",
"-----BEGIN RSA",
"-----BEGIN PRIVATE",
"sk-", // OpenAI API keys
"sk_live_", // Stripe keys
"AKIA", // AWS access keys
];
/// Patterns that may indicate prompt injection in tool output.
const INJECTION_PATTERNS: &[&str] = &[
"ignore previous instructions",
"ignore all previous",
"disregard your instructions",
"you are now",
"new instructions:",
"system:",
"[INST]",
"</scratchpad>",
"think step by step about",
];
/// Tool output sanitization middleware.
///
/// Priority 360 — runs after ToolErrorMiddleware (350), before GuardrailMiddleware (400).
pub struct ToolOutputGuardMiddleware {
max_output_length: usize,
}
impl ToolOutputGuardMiddleware {
pub fn new() -> Self {
Self {
max_output_length: MAX_OUTPUT_LENGTH,
}
}
}
impl Default for ToolOutputGuardMiddleware {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl AgentMiddleware for ToolOutputGuardMiddleware {
fn name(&self) -> &str { "tool_output_guard" }
fn priority(&self) -> i32 { 360 }
async fn before_tool_call(
&self,
_ctx: &MiddlewareContext,
_tool_name: &str,
_tool_input: &Value,
) -> Result<ToolCallDecision> {
// No pre-execution checks — this middleware only inspects output
Ok(ToolCallDecision::Allow)
}
async fn after_tool_call(
&self,
_ctx: &mut MiddlewareContext,
tool_name: &str,
result: &Value,
) -> Result<()> {
let output_str = serde_json::to_string(result).unwrap_or_default();
let output_len = output_str.len();
// Rule 1: Output length check
if output_len > self.max_output_length {
tracing::warn!(
"[ToolOutputGuard] Tool '{}' returned oversized output: {} chars (limit: {})",
tool_name, output_len, self.max_output_length
);
}
// Rule 2: Sensitive information detection
let output_lower = output_str.to_lowercase();
for pattern in SENSITIVE_PATTERNS {
if output_lower.contains(pattern) {
tracing::warn!(
"[ToolOutputGuard] Tool '{}' output contains sensitive pattern: '{}'",
tool_name, pattern
);
break; // Only warn once per tool call
}
}
// Rule 3: Injection marker detection
for pattern in INJECTION_PATTERNS {
if output_lower.contains(pattern) {
tracing::warn!(
"[ToolOutputGuard] Tool '{}' output contains potential injection marker: '{}'",
tool_name, pattern
);
break; // Only warn once per tool call
}
}
Ok(())
}
}