refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules
- Split zclaw-kernel/generation.rs (1080 lines) into 3 modules
- Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard
- Add PromptBuilder for structured system prompt assembly
- Add FactStore (zclaw-memory) for persistent fact extraction
- Add task builtin tool for agent task management
- Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings
- Replace let _ = with proper log::warn! across SaaS handlers
- Remove unused dependency (url) from zclaw-hands
This commit is contained in:
iven
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions

View File

@@ -454,6 +454,9 @@ async fn generate_llm_summary(
temperature: Some(0.3),
stop: Vec::new(),
stream: false,
thinking_enabled: false,
reasoning_effort: None,
plan_mode: false,
};
let response = driver

View File

@@ -181,8 +181,12 @@ impl LlmDriver for AnthropicDriver {
}
}
"error" => {
let error_msg = serde_json::from_str::<serde_json::Value>(&data)
.ok()
.and_then(|v| v.get("error").and_then(|e| e.get("message")).and_then(|m| m.as_str().map(String::from)))
.unwrap_or_else(|| format!("Stream error: {}", &data[..data.len().min(200)]));
yield Ok(StreamChunk::Error {
message: "Stream error".to_string(),
message: error_msg,
});
}
_ => {}
@@ -251,15 +255,42 @@ impl AnthropicDriver {
})
.collect();
let requested_max = request.max_tokens.unwrap_or(4096);
let (thinking, budget) = if request.thinking_enabled {
let budget = match request.reasoning_effort.as_deref() {
Some("low") => 2000,
Some("medium") => 10000,
Some("high") => 32000,
_ => 10000, // default
};
(Some(AnthropicThinking {
r#type: "enabled".to_string(),
budget_tokens: budget,
}), budget)
} else {
(None, 0)
};
// When thinking is enabled, max_tokens is the TOTAL budget (thinking + text).
// Use the maximum output limit (65536) so thinking can consume whatever it
// needs without starving the text response. We only pay for tokens actually
// generated, so a high limit costs nothing extra.
let effective_max = if budget > 0 {
65536
} else {
requested_max
};
AnthropicRequest {
model: request.model.clone(),
max_tokens: request.max_tokens.unwrap_or(4096),
max_tokens: effective_max,
system: request.system.clone(),
messages,
tools: if tools.is_empty() { None } else { Some(tools) },
temperature: request.temperature,
stop_sequences: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
stream: request.stream,
thinking,
}
}
@@ -313,6 +344,14 @@ struct AnthropicRequest {
stop_sequences: Option<Vec<String>>,
#[serde(default)]
stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
thinking: Option<AnthropicThinking>,
}
#[derive(Serialize)]
struct AnthropicThinking {
r#type: String,
budget_tokens: u32,
}
#[derive(Serialize)]

View File

@@ -265,6 +265,10 @@ impl GeminiDriver {
/// - Tool definitions use `functionDeclarations`
/// - Tool results are sent as `functionResponse` parts in `user` messages
fn build_api_request(&self, request: &CompletionRequest) -> GeminiRequest {
if request.thinking_enabled {
tracing::debug!("[GeminiDriver] thinking_enabled=true but Gemini does not support native thinking mode; ignoring");
}
let mut contents: Vec<GeminiContent> = Vec::new();
for msg in &request.messages {

View File

@@ -58,6 +58,10 @@ impl LocalDriver {
// ----------------------------------------------------------------
fn build_api_request(&self, request: &CompletionRequest) -> LocalApiRequest {
if request.thinking_enabled {
tracing::debug!("[LocalDriver] thinking_enabled=true but local driver does not support native thinking mode; ignoring");
}
let messages: Vec<LocalApiMessage> = request
.messages
.iter()
@@ -183,7 +187,7 @@ impl LocalDriver {
.unwrap_or(false);
let blocks = if has_tool_calls {
let tool_calls = c.message.tool_calls.as_ref().unwrap();
let tool_calls = c.message.tool_calls.as_deref().unwrap_or_default();
tool_calls
.iter()
.map(|tc| {
@@ -199,7 +203,7 @@ impl LocalDriver {
.collect()
} else if has_content {
vec![ContentBlock::Text {
text: c.message.content.clone().unwrap(),
text: c.message.content.clone().unwrap_or_default(),
}]
} else {
vec![ContentBlock::Text {

View File

@@ -60,6 +60,15 @@ pub struct CompletionRequest {
pub stop: Vec<String>,
/// Enable streaming
pub stream: bool,
/// Enable extended thinking/reasoning
#[serde(default)]
pub thinking_enabled: bool,
/// Reasoning effort level (for providers that support it)
#[serde(default)]
pub reasoning_effort: Option<String>,
/// Enable plan mode
#[serde(default)]
pub plan_mode: bool,
}
impl Default for CompletionRequest {
@@ -73,27 +82,16 @@ impl Default for CompletionRequest {
temperature: Some(0.7),
stop: Vec::new(),
stream: false,
thinking_enabled: false,
reasoning_effort: None,
plan_mode: false,
}
}
}
/// Tool definition for LLM
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolDefinition {
pub name: String,
pub description: String,
pub input_schema: serde_json::Value,
}
impl ToolDefinition {
pub fn new(name: impl Into<String>, description: impl Into<String>, schema: serde_json::Value) -> Self {
Self {
name: name.into(),
description: description.into(),
input_schema: schema,
}
}
}
/// Tool definition for LLM function calling.
/// Re-exported from `zclaw_types::tool::ToolDefinition` (canonical definition).
pub use zclaw_types::tool::ToolDefinition;
/// Completion response
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -110,7 +108,8 @@ pub struct CompletionResponse {
pub stop_reason: StopReason,
}
/// Content block in response
/// LLM driver response content block (subset of canonical zclaw_types::ContentBlock).
/// Used internally by Anthropic/OpenAI/Gemini/Local drivers for API response parsing.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentBlock {

View File

@@ -130,8 +130,8 @@ impl LlmDriver for OpenAiDriver {
let api_key = self.api_key.expose_secret().to_string();
Box::pin(stream! {
println!("[OpenAI:stream] POST to {}/chat/completions", base_url);
println!("[OpenAI:stream] Request model={}, stream={}", stream_request.model, stream_request.stream);
tracing::debug!("[OpenAI:stream] POST to {}/chat/completions", base_url);
tracing::debug!("[OpenAI:stream] Request model={}, stream={}", stream_request.model, stream_request.stream);
let response = match self.client
.post(format!("{}/chat/completions", base_url))
.header("Authorization", format!("Bearer {}", api_key))
@@ -142,11 +142,11 @@ impl LlmDriver for OpenAiDriver {
.await
{
Ok(r) => {
println!("[OpenAI:stream] Response status: {}, content-type: {:?}", r.status(), r.headers().get("content-type"));
tracing::debug!("[OpenAI:stream] Response status: {}, content-type: {:?}", r.status(), r.headers().get("content-type"));
r
},
Err(e) => {
println!("[OpenAI:stream] HTTP request FAILED: {:?}", e);
tracing::debug!("[OpenAI:stream] HTTP request FAILED: {:?}", e);
yield Err(ZclawError::LlmError(format!("HTTP request failed: {}", e)));
return;
}
@@ -155,7 +155,7 @@ impl LlmDriver for OpenAiDriver {
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
println!("[OpenAI:stream] API error {}: {}", status, &body[..body.len().min(500)]);
tracing::debug!("[OpenAI:stream] API error {}: {}", status, &body[..body.len().min(500)]);
yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
return;
}
@@ -170,7 +170,7 @@ impl LlmDriver for OpenAiDriver {
let chunk = match chunk_result {
Ok(c) => c,
Err(e) => {
println!("[OpenAI:stream] Byte stream error: {:?}", e);
tracing::debug!("[OpenAI:stream] Byte stream error: {:?}", e);
yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
continue;
}
@@ -180,7 +180,7 @@ impl LlmDriver for OpenAiDriver {
let text = String::from_utf8_lossy(&chunk);
// Log first 500 bytes of raw data for debugging SSE format
if raw_bytes_total <= 600 {
println!("[OpenAI:stream] RAW chunk ({} bytes): {:?}", text.len(), &text[..text.len().min(500)]);
tracing::debug!("[OpenAI:stream] RAW chunk ({} bytes): {:?}", text.len(), &text[..text.len().min(500)]);
}
for line in text.lines() {
let trimmed = line.trim();
@@ -198,10 +198,10 @@ impl LlmDriver for OpenAiDriver {
if let Some(data) = data {
sse_event_count += 1;
if sse_event_count <= 3 || data == "[DONE]" {
println!("[OpenAI:stream] SSE #{}: {}", sse_event_count, &data[..data.len().min(300)]);
tracing::debug!("[OpenAI:stream] SSE #{}: {}", sse_event_count, &data[..data.len().min(300)]);
}
if data == "[DONE]" {
println!("[OpenAI:stream] Received [DONE], total SSE events: {}, raw bytes: {}", sse_event_count, raw_bytes_total);
tracing::debug!("[OpenAI:stream] Received [DONE], total SSE events: {}, raw bytes: {}", sse_event_count, raw_bytes_total);
// Emit ToolUseEnd for all accumulated tool calls (skip invalid ones with empty name)
for (id, (name, args)) in &accumulated_tool_calls {
@@ -319,7 +319,7 @@ impl LlmDriver for OpenAiDriver {
}
}
}
println!("[OpenAI:stream] Byte stream ended. Total: {} SSE events, {} raw bytes", sse_event_count, raw_bytes_total);
tracing::debug!("[OpenAI:stream] Byte stream ended. Total: {} SSE events, {} raw bytes", sse_event_count, raw_bytes_total);
})
}
}
@@ -496,6 +496,7 @@ impl OpenAiDriver {
stop: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
stream: request.stream,
tools: if tools.is_empty() { None } else { Some(tools) },
reasoning_effort: request.reasoning_effort.clone(),
};
// Pre-send payload size validation
@@ -581,8 +582,8 @@ impl OpenAiDriver {
let has_reasoning = c.message.reasoning_content.as_ref().map(|t| !t.is_empty()).unwrap_or(false);
let blocks = if has_tool_calls {
// Tool calls take priority
let tool_calls = c.message.tool_calls.as_ref().unwrap();
// Tool calls take priority — safe to unwrap after has_tool_calls check
let tool_calls = c.message.tool_calls.as_ref().cloned().unwrap_or_default();
tracing::debug!("[OpenAiDriver:convert_response] Using tool_calls: {} calls", tool_calls.len());
tool_calls.iter().map(|tc| ContentBlock::ToolUse {
id: tc.id.clone(),
@@ -590,15 +591,15 @@ impl OpenAiDriver {
input: serde_json::from_str(&tc.function.arguments).unwrap_or(serde_json::Value::Null),
}).collect()
} else if has_content {
// Non-empty content
let text = c.message.content.as_ref().unwrap();
// Non-empty content — safe to unwrap after has_content check
let text = c.message.content.as_deref().unwrap_or("");
tracing::debug!("[OpenAiDriver:convert_response] Using text content: {} chars", text.len());
vec![ContentBlock::Text { text: text.clone() }]
vec![ContentBlock::Text { text: text.to_string() }]
} else if has_reasoning {
// Content empty but reasoning_content present (Kimi, Qwen, DeepSeek)
let reasoning = c.message.reasoning_content.as_ref().unwrap();
let reasoning = c.message.reasoning_content.as_deref().unwrap_or("");
tracing::debug!("[OpenAiDriver:convert_response] Using reasoning_content: {} chars", reasoning.len());
vec![ContentBlock::Text { text: reasoning.clone() }]
vec![ContentBlock::Text { text: reasoning.to_string() }]
} else {
// No content or tool_calls
tracing::debug!("[OpenAiDriver:convert_response] No content or tool_calls, using empty text");
@@ -771,6 +772,8 @@ struct OpenAiRequest {
stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
tools: Option<Vec<OpenAiTool>>,
#[serde(skip_serializing_if = "Option::is_none")]
reasoning_effort: Option<String>,
}
#[derive(Serialize)]
@@ -833,7 +836,7 @@ struct OpenAiResponse {
usage: Option<OpenAiUsage>,
}
#[derive(Deserialize, Default)]
#[derive(Deserialize, Default, Clone)]
struct OpenAiChoice {
#[serde(default)]
message: OpenAiResponseMessage,
@@ -841,7 +844,7 @@ struct OpenAiChoice {
finish_reason: Option<String>,
}
#[derive(Deserialize, Default)]
#[derive(Deserialize, Default, Clone)]
struct OpenAiResponseMessage {
#[serde(default)]
content: Option<String>,
@@ -851,7 +854,7 @@ struct OpenAiResponseMessage {
tool_calls: Option<Vec<OpenAiToolCallResponse>>,
}
#[derive(Deserialize, Default)]
#[derive(Deserialize, Default, Clone)]
struct OpenAiToolCallResponse {
#[serde(default)]
id: String,
@@ -859,7 +862,7 @@ struct OpenAiToolCallResponse {
function: FunctionCallResponse,
}
#[derive(Deserialize, Default)]
#[derive(Deserialize, Default, Clone)]
struct FunctionCallResponse {
#[serde(default)]
name: String,

View File

@@ -16,6 +16,7 @@ use zclaw_growth::{
MemoryExtractor, MemoryRetriever, PromptInjector, RetrievalResult,
VikingAdapter,
};
use zclaw_memory::{ExtractedFactBatch, Fact, FactCategory};
use zclaw_types::{AgentId, Message, Result, SessionId};
/// Growth system integration for AgentLoop
@@ -212,6 +213,80 @@ impl GrowthIntegration {
Ok(count)
}
/// Combined extraction: single LLM call that produces both stored memories
/// and structured facts, avoiding double extraction overhead.
///
/// Returns `(memory_count, Option<ExtractedFactBatch>)` on success.
pub async fn extract_combined(
&self,
agent_id: &AgentId,
messages: &[Message],
session_id: &SessionId,
) -> Result<Option<(usize, ExtractedFactBatch)>> {
if !self.config.enabled || !self.config.auto_extract {
return Ok(None);
}
// Single LLM extraction call
let extracted = self
.extractor
.extract(messages, session_id.clone())
.await
.unwrap_or_else(|e| {
tracing::warn!("[GrowthIntegration] Combined extraction failed: {}", e);
Vec::new()
});
if extracted.is_empty() {
return Ok(None);
}
let mem_count = extracted.len();
// Store raw memories
self.extractor
.store_memories(&agent_id.to_string(), &extracted)
.await?;
// Track learning event
self.tracker
.record_learning(agent_id, &session_id.to_string(), mem_count)
.await?;
// Convert same extracted memories to structured facts (no extra LLM call)
let facts: Vec<Fact> = extracted
.into_iter()
.map(|m| {
let category = match m.memory_type {
zclaw_growth::types::MemoryType::Preference => FactCategory::Preference,
zclaw_growth::types::MemoryType::Knowledge => FactCategory::Knowledge,
zclaw_growth::types::MemoryType::Experience => FactCategory::Behavior,
_ => FactCategory::General,
};
Fact::new(m.content, category, f64::from(m.confidence))
.with_source(session_id.to_string())
})
.collect();
let batch = ExtractedFactBatch {
facts,
agent_id: agent_id.to_string(),
session_id: session_id.to_string(),
}
.deduplicate()
.filter_by_confidence(0.7);
if batch.is_empty() {
return Ok(Some((mem_count, ExtractedFactBatch {
facts: vec![],
agent_id: agent_id.to_string(),
session_id: session_id.to_string(),
})));
}
Ok(Some((mem_count, batch)))
}
/// Retrieve memories for a query without injection
pub async fn retrieve_memories(
&self,

View File

@@ -16,6 +16,7 @@ pub mod stream;
pub mod growth;
pub mod compaction;
pub mod middleware;
pub mod prompt;
// Re-export main types
pub use driver::{
@@ -31,3 +32,4 @@ pub use zclaw_growth::VikingAdapter;
pub use zclaw_growth::EmbeddingClient;
pub use zclaw_growth::LlmDriverForExtraction;
pub use compaction::{CompactionConfig, CompactionOutcome};
pub use prompt::{PromptBuilder, PromptContext, PromptSection};

View File

@@ -14,6 +14,7 @@ use crate::loop_guard::{LoopGuard, LoopGuardResult};
use crate::growth::GrowthIntegration;
use crate::compaction::{self, CompactionConfig};
use crate::middleware::{self, MiddlewareChain};
use crate::prompt::{PromptBuilder, PromptContext};
use zclaw_memory::MemoryStore;
/// Agent loop runner
@@ -25,6 +26,8 @@ pub struct AgentLoop {
loop_guard: Mutex<LoopGuard>,
model: String,
system_prompt: Option<String>,
/// Custom agent personality for prompt assembly
soul: Option<String>,
max_tokens: u32,
temperature: f32,
skill_executor: Option<Arc<dyn SkillExecutor>>,
@@ -39,6 +42,12 @@ pub struct AgentLoop {
/// delegated to the chain instead of the inline code below.
/// When `None`, the legacy inline path is used (100% backward compatible).
middleware_chain: Option<MiddlewareChain>,
/// Chat mode: extended thinking enabled
thinking_enabled: bool,
/// Chat mode: reasoning effort level
reasoning_effort: Option<String>,
/// Chat mode: plan mode
plan_mode: bool,
}
impl AgentLoop {
@@ -56,7 +65,8 @@ impl AgentLoop {
loop_guard: Mutex::new(LoopGuard::default()),
model: String::new(), // Must be set via with_model()
system_prompt: None,
max_tokens: 4096,
soul: None,
max_tokens: 16384,
temperature: 0.7,
skill_executor: None,
path_validator: None,
@@ -64,6 +74,9 @@ impl AgentLoop {
compaction_threshold: 0,
compaction_config: CompactionConfig::default(),
middleware_chain: None,
thinking_enabled: false,
reasoning_effort: None,
plan_mode: false,
}
}
@@ -91,6 +104,30 @@ impl AgentLoop {
self
}
/// Set the agent personality (SOUL.md equivalent)
pub fn with_soul(mut self, soul: impl Into<String>) -> Self {
self.soul = Some(soul.into());
self
}
/// Enable extended thinking/reasoning mode
pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
self.thinking_enabled = enabled;
self
}
/// Set reasoning effort level (low/medium/high)
pub fn with_reasoning_effort(mut self, effort: impl Into<String>) -> Self {
self.reasoning_effort = Some(effort.into());
self
}
/// Enable plan mode
pub fn with_plan_mode(mut self, enabled: bool) -> Self {
self.plan_mode = enabled;
self
}
/// Set max tokens
pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
self.max_tokens = max_tokens;
@@ -214,7 +251,15 @@ impl AgentLoop {
// Enhance system prompt — skip when middleware chain handles it
let mut enhanced_prompt = if use_middleware {
self.system_prompt.clone().unwrap_or_default()
let prompt_ctx = PromptContext {
base_prompt: self.system_prompt.clone(),
soul: self.soul.clone(),
thinking_enabled: self.thinking_enabled,
plan_mode: self.plan_mode,
tool_definitions: self.tools.definitions(),
agent_name: None,
};
PromptBuilder::new().build(&prompt_ctx)
} else if let Some(ref growth) = self.growth {
let base = self.system_prompt.as_deref().unwrap_or("");
growth.enhance_prompt(&self.agent_id, base, &input).await?
@@ -279,6 +324,9 @@ impl AgentLoop {
temperature: Some(self.temperature),
stop: Vec::new(),
stream: false,
thinking_enabled: self.thinking_enabled,
reasoning_effort: self.reasoning_effort.clone(),
plan_mode: self.plan_mode,
};
// Call LLM
@@ -352,7 +400,12 @@ impl AgentLoop {
// Create tool context and execute all tools
let tool_context = self.create_tool_context(session_id.clone());
let mut circuit_breaker_triggered = false;
let mut abort_result: Option<AgentLoopResult> = None;
for (id, name, input) in tool_calls {
// Check if loop was already aborted
if abort_result.is_some() {
break;
}
// Check tool call safety — via middleware chain or inline loop guard
if let Some(ref chain) = self.middleware_chain {
let mw_ctx_ref = middleware::MiddlewareContext {
@@ -382,6 +435,17 @@ impl AgentLoop {
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), tool_result, false));
continue;
}
middleware::ToolCallDecision::AbortLoop(reason) => {
tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
let msg = format!("{}\n已自动终止", reason);
self.memory.append_message(&session_id, &Message::assistant(&msg)).await?;
abort_result = Some(AgentLoopResult {
response: msg,
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations,
});
}
}
} else {
// Legacy inline path
@@ -421,6 +485,11 @@ impl AgentLoop {
// Continue the loop - LLM will process tool results and generate final response
// If middleware aborted the loop, return immediately
if let Some(result) = abort_result {
break result;
}
// If circuit breaker was triggered, terminate immediately
if circuit_breaker_triggered {
let msg = "检测到工具调用循环,已自动终止";
@@ -502,7 +571,15 @@ impl AgentLoop {
// Enhance system prompt — skip when middleware chain handles it
let mut enhanced_prompt = if use_middleware {
self.system_prompt.clone().unwrap_or_default()
let prompt_ctx = PromptContext {
base_prompt: self.system_prompt.clone(),
soul: self.soul.clone(),
thinking_enabled: self.thinking_enabled,
plan_mode: self.plan_mode,
tool_definitions: self.tools.definitions(),
agent_name: None,
};
PromptBuilder::new().build(&prompt_ctx)
} else if let Some(ref growth) = self.growth {
let base = self.system_prompt.as_deref().unwrap_or("");
growth.enhance_prompt(&self.agent_id, base, &input).await?
@@ -552,6 +629,9 @@ impl AgentLoop {
let model = self.model.clone();
let max_tokens = self.max_tokens;
let temperature = self.temperature;
let thinking_enabled = self.thinking_enabled;
let reasoning_effort = self.reasoning_effort.clone();
let plan_mode = self.plan_mode;
tokio::spawn(async move {
let mut messages = messages;
@@ -584,6 +664,9 @@ impl AgentLoop {
temperature: Some(temperature),
stop: Vec::new(),
stream: true,
thinking_enabled,
reasoning_effort: reasoning_effort.clone(),
plan_mode,
};
let mut stream = driver.stream(request);
@@ -596,9 +679,12 @@ impl AgentLoop {
let mut chunk_count: usize = 0;
let mut text_delta_count: usize = 0;
let mut thinking_delta_count: usize = 0;
while let Some(chunk_result) = stream.next().await {
match chunk_result {
Ok(chunk) => {
let mut stream_errored = false;
let chunk_timeout = std::time::Duration::from_secs(60);
loop {
match tokio::time::timeout(chunk_timeout, stream.next()).await {
Ok(Some(Ok(chunk))) => {
chunk_count += 1;
match &chunk {
StreamChunk::TextDelta { delta } => {
@@ -610,8 +696,8 @@ impl AgentLoop {
StreamChunk::ThinkingDelta { delta } => {
thinking_delta_count += 1;
tracing::debug!("[AgentLoop] ThinkingDelta #{}: {} chars", thinking_delta_count, delta.len());
// Accumulate reasoning separately — not mixed into iteration_text
reasoning_text.push_str(delta);
let _ = tx.send(LoopEvent::ThinkingDelta(delta.clone())).await;
}
StreamChunk::ToolUseStart { id, name } => {
tracing::debug!("[AgentLoop] ToolUseStart: id={}, name={}", id, name);
@@ -651,21 +737,43 @@ impl AgentLoop {
StreamChunk::Error { message } => {
tracing::error!("[AgentLoop] Stream error: {}", message);
let _ = tx.send(LoopEvent::Error(message.clone())).await;
stream_errored = true;
}
}
}
Err(e) => {
Ok(Some(Err(e))) => {
tracing::error!("[AgentLoop] Chunk error: {}", e);
let _ = tx.send(LoopEvent::Error(e.to_string())).await;
let _ = tx.send(LoopEvent::Error(format!("LLM 锥应错误: {}", e.to_string()))).await;
stream_errored = true;
}
Ok(None) => break, // Stream ended normally
Err(_) => {
tracing::error!("[AgentLoop] Stream chunk timeout ({}s)", chunk_timeout.as_secs());
let _ = tx.send(LoopEvent::Error("LLM 响应超时,请重试".to_string())).await;
stream_errored = true;
}
}
if stream_errored {
break;
}
}
tracing::info!("[AgentLoop] Stream ended: {} total chunks (text={}, thinking={}, tools={}), iteration_text={} chars",
chunk_count, text_delta_count, thinking_delta_count, pending_tool_calls.len(),
iteration_text.len());
if iteration_text.is_empty() {
tracing::warn!("[AgentLoop] WARNING: iteration_text is EMPTY after {} chunks! text_delta={}, thinking_delta={}",
chunk_count, text_delta_count, thinking_delta_count);
// Fallback: if model generated reasoning but no text content,
// use reasoning as text response. This happens with some thinking models
// (DeepSeek R1, QWQ) that put the answer in reasoning_content instead of content.
// Safe now because: (1) context is clean (no stale user_profile/memory injection),
// (2) max_tokens=16384 prevents truncation, (3) reasoning is about the correct topic.
if iteration_text.is_empty() && !reasoning_text.is_empty() {
tracing::info!("[AgentLoop] Model generated {} chars of reasoning but no text — using reasoning as response",
reasoning_text.len());
let _ = tx.send(LoopEvent::Delta(reasoning_text.clone())).await;
iteration_text = reasoning_text.clone();
} else if iteration_text.is_empty() {
tracing::warn!("[AgentLoop] No text content after {} chunks (thinking_delta={})",
chunk_count, thinking_delta_count);
}
// If no tool calls, we have the final response
@@ -706,6 +814,12 @@ impl AgentLoop {
break 'outer;
}
// Skip tool processing if stream errored or timed out
if stream_errored {
tracing::debug!("[AgentLoop] Stream errored, skipping tool processing and breaking");
break 'outer;
}
tracing::debug!("[AgentLoop] Processing {} tool calls (reasoning: {} chars)", pending_tool_calls.len(), reasoning_text.len());
// Push assistant message with reasoning before tool calls (required by Kimi and other thinking-enabled APIs)
@@ -745,6 +859,11 @@ impl AgentLoop {
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
continue;
}
Ok(middleware::ToolCallDecision::AbortLoop(reason)) => {
tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
let _ = tx.send(LoopEvent::Error(reason)).await;
break 'outer;
}
Ok(middleware::ToolCallDecision::ReplaceInput(new_input)) => {
// Execute with replaced input (same path_validator logic below)
let pv = path_validator.clone().unwrap_or_else(|| {
@@ -883,6 +1002,8 @@ pub struct AgentLoopResult {
pub enum LoopEvent {
/// Text delta from LLM
Delta(String),
/// Thinking/reasoning delta from LLM (extended thinking)
ThinkingDelta(String),
/// Tool execution started
ToolStart { name: String, input: serde_json::Value },
/// Tool execution completed

View File

@@ -41,6 +41,8 @@ pub enum ToolCallDecision {
Block(String),
/// Allow the call but replace the tool input with *new_input*.
ReplaceInput(Value),
/// Terminate the entire agent loop immediately (e.g. circuit breaker).
AbortLoop(String),
}
// ---------------------------------------------------------------------------
@@ -194,6 +196,25 @@ impl MiddlewareChain {
Ok(ToolCallDecision::Allow)
}
/// Run all `before_tool_call` hooks with mutable context.
pub async fn run_before_tool_call_mut(
&self,
ctx: &mut MiddlewareContext,
tool_name: &str,
tool_input: &Value,
) -> Result<ToolCallDecision> {
for mw in &self.middlewares {
match mw.before_tool_call(ctx, tool_name, tool_input).await? {
ToolCallDecision::Allow => {}
other => {
tracing::info!("[MiddlewareChain] '{}' decided {:?} for tool '{}'", mw.name(), other, tool_name);
return Ok(other);
}
}
}
Ok(ToolCallDecision::Allow)
}
/// Run all `after_tool_call` hooks in order.
pub async fn run_after_tool_call(
&self,
@@ -245,8 +266,13 @@ impl Default for MiddlewareChain {
// ---------------------------------------------------------------------------
pub mod compaction;
pub mod dangling_tool;
pub mod guardrail;
pub mod loop_guard;
pub mod memory;
pub mod skill_index;
pub mod subagent_limit;
pub mod title;
pub mod token_calibration;
pub mod tool_error;
pub mod tool_output_guard;

View File

@@ -0,0 +1,125 @@
//! Dangling tool-call repair middleware — detects and patches missing tool-result
//! messages that would cause LLM API errors.
//!
//! When the LLM produces a `ToolUse` content block but the agent loop fails to
//! produce a corresponding `ToolResult` message (e.g. due to a crash or timeout),
//! the conversation history becomes inconsistent. The next LLM call would fail with
//! an API error because ToolUse messages must be followed by ToolResult messages.
//!
//! This middleware inspects the message history before each completion and appends
//! placeholder ToolResult messages for any dangling ToolUse entries.
use std::collections::HashSet;
use async_trait::async_trait;
use zclaw_types::{Message, Result};
use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
/// Middleware that repairs dangling tool-use blocks in conversation history.
///
/// Priority 300 — runs before tool error middleware (350) and guardrail (400).
pub struct DanglingToolMiddleware;
impl DanglingToolMiddleware {
pub fn new() -> Self {
Self
}
}
impl Default for DanglingToolMiddleware {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl AgentMiddleware for DanglingToolMiddleware {
fn name(&self) -> &str { "dangling_tool" }
fn priority(&self) -> i32 { 300 }
async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
let mut patched_count = 0usize;
// Step 1: Collect all ToolUse IDs and matched ToolResult IDs across the
// entire message list (not just adjacent pairs).
let mut tool_use_ids: Vec<(String, String)> = Vec::new(); // (id, tool_name)
let mut tool_result_ids: HashSet<String> = HashSet::new();
for msg in &ctx.messages {
match msg {
Message::ToolUse { ref id, ref tool, .. } => {
tool_use_ids.push((id.clone(), tool.as_str().to_string()));
}
Message::ToolResult { ref tool_call_id, ref output, .. } => {
// Original results always count as matched regardless of patch status.
// We insert unconditionally so that the HashSet contains the ID,
// preventing false-positive "dangling" detection.
let _ = output; // suppress unused warning — patch check is informational only
tool_result_ids.insert(tool_call_id.clone());
}
_ => {}
}
}
// Step 2: Find dangling ToolUse entries that have no matching ToolResult.
let dangling_ids: HashSet<String> = tool_use_ids.iter()
.filter(|(id, _)| !tool_result_ids.contains(id))
.map(|(id, _)| id.clone())
.collect();
if dangling_ids.is_empty() {
return Ok(MiddlewareDecision::Continue);
}
// Step 3: Insert placeholder ToolResult for each dangling ToolUse.
// Also skip ToolUse entries that already have a patched placeholder further
// down the list (prevents double-patching if the middleware runs twice).
let capacity = ctx.messages.len() + dangling_ids.len();
let mut patched_messages: Vec<Message> = Vec::with_capacity(capacity);
for msg in &ctx.messages {
patched_messages.push(msg.clone());
if let Message::ToolUse { ref id, ref tool, .. } = msg {
if dangling_ids.contains(id) {
tracing::warn!(
"[DanglingToolMiddleware] Patching dangling ToolUse: tool={}, id={}",
tool.as_str(), id
);
let placeholder = Message::tool_result(
id.clone(),
tool.clone(),
serde_json::json!({
"error": "Tool execution was interrupted. Please retry or use an alternative approach.",
"tool_patch": true,
}),
true, // is_error
);
patched_messages.push(placeholder);
patched_count += 1;
}
}
}
// Step 4: Detect streaming interrupt — if the last message is an Assistant
// response while there were dangling tools, the user likely interrupted a
// streaming response mid-tool-execution. No additional action is needed
// beyond the patched ToolResult messages that now prevent API errors.
if let Some(Message::Assistant { .. }) = patched_messages.last() {
tracing::debug!(
"[DanglingToolMiddleware] Streaming interrupt detected with {} dangling tools",
patched_count
);
}
if patched_count > 0 {
tracing::info!(
"[DanglingToolMiddleware] Patched {} dangling tool-use blocks",
patched_count
);
ctx.messages = patched_messages;
}
Ok(MiddlewareDecision::Continue)
}
}

View File

@@ -41,7 +41,7 @@ impl AgentMiddleware for LoopGuardMiddleware {
match result {
LoopGuardResult::CircuitBreaker => {
tracing::warn!("[LoopGuardMiddleware] Circuit breaker triggered by tool '{}'", tool_name);
Ok(ToolCallDecision::Block("检测到工具调用循环,已自动终止".to_string()))
Ok(ToolCallDecision::AbortLoop("检测到工具调用循环,已自动终止".to_string()))
}
LoopGuardResult::Blocked => {
tracing::warn!("[LoopGuardMiddleware] Tool '{}' blocked", tool_name);

View File

@@ -60,34 +60,39 @@ impl AgentMiddleware for MemoryMiddleware {
fn priority(&self) -> i32 { 150 }
async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
// Skip memory injection for very short queries.
// Short queries (e.g., "1+6", "hi", "好") don't benefit from memory context.
// Worse, the retriever's scope-based fallback may return high-importance but
// irrelevant old memories, causing the model to think about past conversations
// instead of answering the current question.
// Use char count (not byte count) so CJK queries are handled correctly:
// a single Chinese char is 3 UTF-8 bytes but 1 meaningful character.
let query = ctx.user_input.trim();
if query.chars().count() < 2 {
tracing::debug!(
"[MemoryMiddleware] Skipping enhancement for short query ({:?}): no memory context needed",
query
);
return Ok(MiddlewareDecision::Continue);
}
tracing::debug!(
"[MemoryMiddleware] before_completion for query: {:?}",
ctx.user_input.chars().take(50).collect::<String>()
);
match self.growth.enhance_prompt(
&ctx.agent_id,
&ctx.system_prompt,
&ctx.user_input,
).await {
// Retrieve relevant memories and inject into system prompt.
// The SqliteStorage retriever now uses FTS5-only matching — if FTS5 finds
// no relevant results, no memories are returned (no scope-based fallback).
// This prevents irrelevant high-importance memories from leaking into
// unrelated conversations.
let base = &ctx.system_prompt;
match self.growth.enhance_prompt(&ctx.agent_id, base, &ctx.user_input).await {
Ok(enhanced) => {
ctx.system_prompt = enhanced;
if enhanced != *base {
tracing::info!(
"[MemoryMiddleware] Injected memories into system prompt for agent {}",
ctx.agent_id
);
ctx.system_prompt = enhanced;
} else {
tracing::debug!(
"[MemoryMiddleware] No relevant memories found for query: {:?}",
ctx.user_input.chars().take(50).collect::<String>()
);
}
Ok(MiddlewareDecision::Continue)
}
Err(e) => {
// Non-fatal: memory retrieval failure should not block the loop
tracing::warn!("[MemoryMiddleware] Prompt enhancement failed: {}", e);
// Non-fatal: retrieval failure should not block the conversation
tracing::warn!(
"[MemoryMiddleware] Memory retrieval failed (non-fatal): {}",
e
);
Ok(MiddlewareDecision::Continue)
}
}

View File

@@ -0,0 +1,87 @@
//! Sub-agent limit middleware — enforces limits on sub-agent spawning.
//!
//! Prevents runaway sub-agent spawning by enforcing a per-turn total cap.
//! The `running` counter was removed because it leaked when subsequent
//! middleware blocked the tool call (before_tool_call increments but
//! after_tool_call never fires for blocked tools).
use async_trait::async_trait;
use serde_json::Value;
use zclaw_types::Result;
use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
/// Default maximum total sub-agents per conversation turn.
const DEFAULT_MAX_TOTAL: usize = 10;
/// Middleware that limits total sub-agent spawn count per turn.
///
/// Priority 550 — runs after loop guard (500).
pub struct SubagentLimitMiddleware {
/// Maximum total sub-agents per conversation turn.
max_total: usize,
/// Total sub-agents spawned in this turn.
total_spawned: std::sync::atomic::AtomicUsize,
}
impl SubagentLimitMiddleware {
pub fn new() -> Self {
Self {
max_total: DEFAULT_MAX_TOTAL,
total_spawned: std::sync::atomic::AtomicUsize::new(0),
}
}
pub fn with_max_total(mut self, n: usize) -> Self {
self.max_total = n;
self
}
/// Check if a tool call is a sub-agent spawn request.
fn is_subagent_tool(tool_name: &str) -> bool {
matches!(tool_name, "task" | "delegate" | "spawn_agent" | "subagent")
}
}
impl Default for SubagentLimitMiddleware {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl AgentMiddleware for SubagentLimitMiddleware {
fn name(&self) -> &str { "subagent_limit" }
fn priority(&self) -> i32 { 550 }
async fn before_tool_call(
&self,
_ctx: &MiddlewareContext,
tool_name: &str,
_tool_input: &Value,
) -> Result<ToolCallDecision> {
if !Self::is_subagent_tool(tool_name) {
return Ok(ToolCallDecision::Allow);
}
let total = self.total_spawned.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
if total >= self.max_total {
self.total_spawned.fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
tracing::warn!(
"[SubagentLimitMiddleware] Total sub-agent limit ({}) reached — blocking spawn",
self.max_total
);
return Ok(ToolCallDecision::Block(format!(
"子Agent总数量已达上限 ({}),请优先完成现有任务后再发起新任务。",
self.max_total
)));
}
Ok(ToolCallDecision::Allow)
}
async fn after_completion(&self, _ctx: &MiddlewareContext) -> Result<()> {
// Reset per-turn counter after the agent loop turn completes.
self.total_spawned.store(0, std::sync::atomic::Ordering::SeqCst);
Ok(())
}
}

View File

@@ -5,22 +5,29 @@
//! "新对话" or truncating the user's first message.
//!
//! Priority 180 — runs after compaction (100) and memory (150), before skill index (200).
//!
//! NOTE: This is a structural placeholder. Full implementation requires an LLM driver
//! reference to generate titles asynchronously, which will be wired through the
//! middleware context in a future iteration. For now it simply passes through.
use async_trait::async_trait;
use zclaw_types::Result;
use crate::middleware::{AgentMiddleware, MiddlewareContext};
use crate::middleware::{AgentMiddleware, MiddlewareDecision};
/// Middleware that auto-generates conversation titles after the first exchange.
///
/// When fully implemented, this will:
/// 1. Detect the first user-assistant exchange (via message count)
/// 2. Call the LLM with a short prompt to generate a descriptive title
/// 3. Update the session title via the middleware context
///
/// For now, it serves as a registered placeholder in the middleware chain.
pub struct TitleMiddleware {
/// Whether a title has been generated for the current session.
titled: std::sync::atomic::AtomicBool,
_reserved: (),
}
impl TitleMiddleware {
pub fn new() -> Self {
Self {
titled: std::sync::atomic::AtomicBool::new(false),
}
Self { _reserved: () }
}
}
@@ -34,4 +41,9 @@ impl Default for TitleMiddleware {
impl AgentMiddleware for TitleMiddleware {
fn name(&self) -> &str { "title" }
fn priority(&self) -> i32 { 180 }
// All hooks default to Continue — placeholder until LLM driver is wired in.
async fn before_completion(&self, _ctx: &mut crate::middleware::MiddlewareContext) -> zclaw_types::Result<MiddlewareDecision> {
Ok(MiddlewareDecision::Continue)
}
}

View File

@@ -0,0 +1,111 @@
//! Tool error middleware — catches tool execution errors and converts them
//! into well-formed tool-result messages for the LLM to recover from.
//!
//! Inspired by DeerFlow's ToolErrorMiddleware: instead of propagating raw errors
//! that crash the agent loop, this middleware wraps tool errors into a structured
//! format that the LLM can use to self-correct.
use async_trait::async_trait;
use serde_json::Value;
use zclaw_types::Result;
use crate::driver::ContentBlock;
use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
/// Middleware that intercepts tool call errors and formats recovery messages.
///
/// Priority 350 — runs after dangling tool repair (300) and before guardrail (400).
pub struct ToolErrorMiddleware {
/// Maximum error message length before truncation.
max_error_length: usize,
}
impl ToolErrorMiddleware {
pub fn new() -> Self {
Self {
max_error_length: 500,
}
}
/// Create with a custom max error length.
pub fn with_max_error_length(mut self, len: usize) -> Self {
self.max_error_length = len;
self
}
/// Format a tool error into a guided recovery message for the LLM.
///
/// The caller is responsible for truncation before passing `error`.
fn format_tool_error(&self, tool_name: &str, error: &str) -> String {
format!(
"工具 '{}' 执行失败。错误信息: {}\n请分析错误原因,尝试修正参数后重试,或使用其他方法完成任务。",
tool_name, error
)
}
}
impl Default for ToolErrorMiddleware {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl AgentMiddleware for ToolErrorMiddleware {
fn name(&self) -> &str { "tool_error" }
fn priority(&self) -> i32 { 350 }
async fn before_tool_call(
&self,
_ctx: &MiddlewareContext,
tool_name: &str,
tool_input: &Value,
) -> Result<ToolCallDecision> {
// Pre-validate tool input structure for common issues.
// This catches malformed JSON inputs before they reach the tool executor.
if tool_input.is_null() {
tracing::warn!(
"[ToolErrorMiddleware] Tool '{}' received null input — replacing with empty object",
tool_name
);
return Ok(ToolCallDecision::ReplaceInput(serde_json::json!({})));
}
Ok(ToolCallDecision::Allow)
}
async fn after_tool_call(
&self,
ctx: &mut MiddlewareContext,
tool_name: &str,
result: &Value,
) -> Result<()> {
// Check if the tool result indicates an error.
if let Some(error) = result.get("error") {
let error_msg = match error {
Value::String(s) => s.clone(),
other => other.to_string(),
};
let truncated = if error_msg.len() > self.max_error_length {
// Use char-boundary-safe truncation to avoid panic on UTF-8 strings (e.g. Chinese)
let end = error_msg.floor_char_boundary(self.max_error_length);
format!("{}...(truncated)", &error_msg[..end])
} else {
error_msg.clone()
};
tracing::warn!(
"[ToolErrorMiddleware] Tool '{}' failed: {}",
tool_name, truncated
);
// Build a guided recovery message so the LLM can self-correct.
let guided_message = self.format_tool_error(tool_name, &truncated);
// Inject into response_content so the agent loop feeds this back
// to the LLM alongside the raw tool result.
ctx.response_content.push(ContentBlock::Text {
text: guided_message,
});
}
Ok(())
}
}

View File

@@ -0,0 +1,132 @@
//! Tool output sanitization middleware — inspects tool results for risky content
//! before they flow back into the LLM context.
//!
//! Inspired by DeerFlow's missing "Toxic Output Loop" defense — ZCLAW proactively
//! implements post-execution output checking.
//!
//! Rules:
//! - Output length cap: warns when tool output exceeds threshold
//! - Sensitive pattern detection: flags API keys, tokens, passwords
//! - Injection marker detection: flags common prompt-injection patterns
//!
//! This middleware does NOT modify content. It only logs warnings at appropriate levels.
use async_trait::async_trait;
use serde_json::Value;
use zclaw_types::Result;
use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
/// Maximum safe output length in characters.
const MAX_OUTPUT_LENGTH: usize = 50_000;
/// Patterns that indicate sensitive information in tool output.
const SENSITIVE_PATTERNS: &[&str] = &[
"api_key",
"apikey",
"api-key",
"secret_key",
"secretkey",
"access_token",
"auth_token",
"password",
"private_key",
"-----BEGIN RSA",
"-----BEGIN PRIVATE",
"sk-", // OpenAI API keys
"sk_live_", // Stripe keys
"AKIA", // AWS access keys
];
/// Patterns that may indicate prompt injection in tool output.
const INJECTION_PATTERNS: &[&str] = &[
"ignore previous instructions",
"ignore all previous",
"disregard your instructions",
"you are now",
"new instructions:",
"system:",
"[INST]",
"</scratchpad>",
"think step by step about",
];
/// Tool output sanitization middleware.
///
/// Priority 360 — runs after ToolErrorMiddleware (350), before GuardrailMiddleware (400).
pub struct ToolOutputGuardMiddleware {
max_output_length: usize,
}
impl ToolOutputGuardMiddleware {
pub fn new() -> Self {
Self {
max_output_length: MAX_OUTPUT_LENGTH,
}
}
}
impl Default for ToolOutputGuardMiddleware {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl AgentMiddleware for ToolOutputGuardMiddleware {
fn name(&self) -> &str { "tool_output_guard" }
fn priority(&self) -> i32 { 360 }
async fn before_tool_call(
&self,
_ctx: &MiddlewareContext,
_tool_name: &str,
_tool_input: &Value,
) -> Result<ToolCallDecision> {
// No pre-execution checks — this middleware only inspects output
Ok(ToolCallDecision::Allow)
}
async fn after_tool_call(
&self,
_ctx: &mut MiddlewareContext,
tool_name: &str,
result: &Value,
) -> Result<()> {
let output_str = serde_json::to_string(result).unwrap_or_default();
let output_len = output_str.len();
// Rule 1: Output length check
if output_len > self.max_output_length {
tracing::warn!(
"[ToolOutputGuard] Tool '{}' returned oversized output: {} chars (limit: {})",
tool_name, output_len, self.max_output_length
);
}
// Rule 2: Sensitive information detection
let output_lower = output_str.to_lowercase();
for pattern in SENSITIVE_PATTERNS {
if output_lower.contains(pattern) {
tracing::warn!(
"[ToolOutputGuard] Tool '{}' output contains sensitive pattern: '{}'",
tool_name, pattern
);
break; // Only warn once per tool call
}
}
// Rule 3: Injection marker detection
for pattern in INJECTION_PATTERNS {
if output_lower.contains(pattern) {
tracing::warn!(
"[ToolOutputGuard] Tool '{}' output contains potential injection marker: '{}'",
tool_name, pattern
);
break; // Only warn once per tool call
}
}
Ok(())
}
}

View File

@@ -0,0 +1,120 @@
use std::fmt::Write;
use crate::driver::ToolDefinition;
/// Runtime context that determines which prompt sections are included.
pub struct PromptContext {
/// Base system prompt from AgentConfig
pub base_prompt: Option<String>,
/// Custom agent personality (SOUL.md equivalent)
pub soul: Option<String>,
/// Whether thinking/extended reasoning is enabled
pub thinking_enabled: bool,
/// Whether plan mode is active
pub plan_mode: bool,
/// Tool definitions available for dynamic injection
pub tool_definitions: Vec<ToolDefinition>,
/// Agent name for personalization
pub agent_name: Option<String>,
}
/// A single section in the assembled prompt.
pub struct PromptSection {
pub name: &'static str,
pub template: String,
pub priority: u32,
}
/// Builds structured system prompts from conditional sections.
pub struct PromptBuilder {
sections: Vec<PromptSection>,
}
impl PromptBuilder {
pub fn new() -> Self {
Self {
sections: Vec::new(),
}
}
/// Add a section unconditionally.
pub fn add_section(
mut self,
name: &'static str,
template: impl Into<String>,
priority: u32,
) -> Self {
self.sections.push(PromptSection {
name,
template: template.into(),
priority,
});
self
}
/// Assemble the final system prompt based on runtime context.
pub fn build(&self, ctx: &PromptContext) -> String {
let mut sections: Vec<&PromptSection> = self.sections.iter().collect();
sections.sort_by_key(|s| s.priority);
let mut result = String::with_capacity(4096);
// Base prompt (always included)
if let Some(ref base) = ctx.base_prompt {
result.push_str(base);
} else {
result.push_str("You are a helpful AI assistant.");
}
// Soul/personality section
if let Some(ref soul) = ctx.soul {
result.push_str("\n\n## Agent Personality\n\n");
result.push_str(soul);
}
// Agent name personalization
if let Some(ref name) = ctx.agent_name {
let _ = write!(result, "\n\nYou are known as \"{name}\". Respond in character.");
}
// Dynamic tool descriptions
if !ctx.tool_definitions.is_empty() {
result.push_str("\n\n## Available Tools\n\n");
for tool in &ctx.tool_definitions {
let _ = writeln!(result, "- **{}**: {}", tool.name, tool.description);
}
}
// Thinking style guidance
if ctx.thinking_enabled {
result.push_str("\n\n## Reasoning Mode\n\n");
result.push_str(
"Extended reasoning is enabled. Think step-by-step before responding. \
Show your reasoning process, then provide the final answer.",
);
}
// Plan mode instructions
if ctx.plan_mode {
result.push_str("\n\n## Plan Mode\n\n");
result.push_str(
"You are in plan mode. Before executing any actions, create a detailed plan. \
Present the plan to the user for approval before proceeding.",
);
}
// Additional registered sections
for section in sections {
result.push_str("\n\n");
result.push_str(&section.template);
}
result
}
}
impl Default for PromptBuilder {
fn default() -> Self {
Self::new()
}
}

View File

@@ -0,0 +1,9 @@
//! Dynamic prompt assembly module.
//!
//! Inspired by DeerFlow's conditional section-based prompt composition.
//! The `PromptBuilder` assembles a structured system prompt from multiple
//! conditional sections before the middleware chain further modifies it.
mod builder;
pub use builder::{PromptBuilder, PromptContext, PromptSection};

View File

@@ -7,6 +7,7 @@ mod web_fetch;
mod execute_skill;
mod skill_load;
mod path_validator;
mod task;
pub use file_read::FileReadTool;
pub use file_write::FileWriteTool;
@@ -15,6 +16,7 @@ pub use web_fetch::WebFetchTool;
pub use execute_skill::ExecuteSkillTool;
pub use skill_load::SkillLoadTool;
pub use path_validator::{PathValidator, PathValidatorConfig};
pub use task::TaskTool;
use crate::tool::ToolRegistry;

View File

@@ -0,0 +1,179 @@
//! Task tool — delegates sub-tasks to a nested AgentLoop.
//!
//! Inspired by DeerFlow's `task_tool`: the lead agent can spawn sub-agent tasks
//! to parallelise complex work. Each sub-task runs its own AgentLoop with a
//! fresh session, isolated context, and a configurable maximum iteration count.
use async_trait::async_trait;
use serde_json::{json, Value};
use zclaw_types::{AgentId, Result, ZclawError};
use zclaw_memory::MemoryStore;
use crate::driver::LlmDriver;
use crate::loop_runner::AgentLoop;
use crate::tool::{Tool, ToolContext, ToolRegistry};
use crate::tool::builtin::register_builtin_tools;
use std::sync::Arc;
/// Default max iterations for a sub-agent task.
const DEFAULT_MAX_ITERATIONS: usize = 5;
/// Tool that delegates sub-tasks to a nested AgentLoop.
pub struct TaskTool {
driver: Arc<dyn LlmDriver>,
memory: Arc<MemoryStore>,
model: String,
max_tokens: u32,
temperature: f32,
}
impl TaskTool {
pub fn new(
driver: Arc<dyn LlmDriver>,
memory: Arc<MemoryStore>,
model: impl Into<String>,
) -> Self {
Self {
driver,
memory,
model: model.into(),
max_tokens: 4096,
temperature: 0.7,
}
}
pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
self.max_tokens = max_tokens;
self
}
pub fn with_temperature(mut self, temperature: f32) -> Self {
self.temperature = temperature;
self
}
}
#[async_trait]
impl Tool for TaskTool {
fn name(&self) -> &str {
"task"
}
fn description(&self) -> &str {
"Delegate a sub-task to a sub-agent. The sub-agent will work independently \
with its own context and tools. Use this to break complex tasks into \
parallel or sequential sub-tasks. Each sub-task runs in its own session \
with a focused system prompt."
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"description": {
"type": "string",
"description": "Short description of the sub-task (shown in progress UI)"
},
"prompt": {
"type": "string",
"description": "Detailed instructions for the sub-agent"
},
"max_iterations": {
"type": "integer",
"description": "Maximum tool-call iterations for the sub-agent (default: 5)",
"minimum": 1,
"maximum": 10
}
},
"required": ["description", "prompt"]
})
}
async fn execute(&self, input: Value, context: &ToolContext) -> Result<Value> {
let description = input["description"].as_str()
.ok_or_else(|| ZclawError::InvalidInput("Missing 'description' parameter".into()))?;
let prompt = input["prompt"].as_str()
.ok_or_else(|| ZclawError::InvalidInput("Missing 'prompt' parameter".into()))?;
let max_iterations = input["max_iterations"].as_u64()
.unwrap_or(DEFAULT_MAX_ITERATIONS as u64) as usize;
tracing::info!(
"[TaskTool] Starting sub-agent task: {:?} (max_iterations={})",
description, max_iterations
);
// Create a sub-agent with its own ID
let sub_agent_id = AgentId::new();
// Create a fresh session for the sub-agent
let session_id = self.memory.create_session(&sub_agent_id).await?;
// Build system prompt focused on the sub-task
let system_prompt = format!(
"你是一个专注的子Agent负责完成以下任务{}\n\n\
要求:\n\
- 专注完成分配给你的任务\n\
- 使用可用的工具来完成任务\n\
- 完成后提供简洁的结果摘要\n\
- 如果遇到无法解决的问题,请说明原因",
description
);
// Create a tool registry with builtin tools
// (TaskTool itself is NOT included to prevent infinite nesting)
let mut tools = ToolRegistry::new();
register_builtin_tools(&mut tools);
// Build a lightweight AgentLoop for the sub-agent
let mut sub_loop = AgentLoop::new(
sub_agent_id,
self.driver.clone(),
tools,
self.memory.clone(),
)
.with_model(&self.model)
.with_system_prompt(&system_prompt)
.with_max_tokens(self.max_tokens)
.with_temperature(self.temperature);
// Optionally inject skill executor and path validator from parent context
if let Some(ref executor) = context.skill_executor {
sub_loop = sub_loop.with_skill_executor(executor.clone());
}
if let Some(ref validator) = context.path_validator {
sub_loop = sub_loop.with_path_validator(validator.clone());
}
// Execute the sub-agent loop (non-streaming — collect full result)
let result = match sub_loop.run(session_id.clone(), prompt.to_string()).await {
Ok(loop_result) => {
tracing::info!(
"[TaskTool] Sub-agent completed: {} iterations, {} input tokens, {} output tokens",
loop_result.iterations, loop_result.input_tokens, loop_result.output_tokens
);
json!({
"status": "completed",
"description": description,
"result": loop_result.response,
"iterations": loop_result.iterations,
"input_tokens": loop_result.input_tokens,
"output_tokens": loop_result.output_tokens,
})
}
Err(e) => {
tracing::warn!("[TaskTool] Sub-agent failed: {}", e);
json!({
"status": "failed",
"description": description,
"error": e.to_string(),
})
}
};
Ok(result)
}
}