refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions
--- a/crates/zclaw-runtime/src/compaction.rs
+++ b/crates/zclaw-runtime/src/compaction.rs
@@ -454,6 +454,9 @@ async fn generate_llm_summary(
        temperature: Some(0.3),
        stop: Vec::new(),
        stream: false,
+        thinking_enabled: false,
+        reasoning_effort: None,
+        plan_mode: false,
    };

    let response = driver
--- a/crates/zclaw-runtime/src/driver/anthropic.rs
+++ b/crates/zclaw-runtime/src/driver/anthropic.rs
@@ -181,8 +181,12 @@ impl LlmDriver for AnthropicDriver {
                                        }
                                    }
                                    "error" => {
+                                        let error_msg = serde_json::from_str::<serde_json::Value>(&data)
+                                            .ok()
+                                            .and_then(|v| v.get("error").and_then(|e| e.get("message")).and_then(|m| m.as_str().map(String::from)))
+                                            .unwrap_or_else(|| format!("Stream error: {}", &data[..data.len().min(200)]));
                                        yield Ok(StreamChunk::Error {
-                                            message: "Stream error".to_string(),
+                                            message: error_msg,
                                        });
                                    }
                                    _ => {}
@@ -251,15 +255,42 @@ impl AnthropicDriver {
            })
            .collect();

+        let requested_max = request.max_tokens.unwrap_or(4096);
+        let (thinking, budget) = if request.thinking_enabled {
+            let budget = match request.reasoning_effort.as_deref() {
+                Some("low") => 2000,
+                Some("medium") => 10000,
+                Some("high") => 32000,
+                _ => 10000, // default
+            };
+            (Some(AnthropicThinking {
+                r#type: "enabled".to_string(),
+                budget_tokens: budget,
+            }), budget)
+        } else {
+            (None, 0)
+        };
+
+        // When thinking is enabled, max_tokens is the TOTAL budget (thinking + text).
+        // Use the maximum output limit (65536) so thinking can consume whatever it
+        // needs without starving the text response.  We only pay for tokens actually
+        // generated, so a high limit costs nothing extra.
+        let effective_max = if budget > 0 {
+            65536
+        } else {
+            requested_max
+        };
+
        AnthropicRequest {
            model: request.model.clone(),
-            max_tokens: request.max_tokens.unwrap_or(4096),
+            max_tokens: effective_max,
            system: request.system.clone(),
            messages,
            tools: if tools.is_empty() { None } else { Some(tools) },
            temperature: request.temperature,
            stop_sequences: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
            stream: request.stream,
+            thinking,
        }
    }

@@ -313,6 +344,14 @@ struct AnthropicRequest {
    stop_sequences: Option<Vec<String>>,
    #[serde(default)]
    stream: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    thinking: Option<AnthropicThinking>,
+}
+
+#[derive(Serialize)]
+struct AnthropicThinking {
+    r#type: String,
+    budget_tokens: u32,
 }

 #[derive(Serialize)]
--- a/crates/zclaw-runtime/src/driver/gemini.rs
+++ b/crates/zclaw-runtime/src/driver/gemini.rs
@@ -265,6 +265,10 @@ impl GeminiDriver {
    /// - Tool definitions use `functionDeclarations`
    /// - Tool results are sent as `functionResponse` parts in `user` messages
    fn build_api_request(&self, request: &CompletionRequest) -> GeminiRequest {
+        if request.thinking_enabled {
+            tracing::debug!("[GeminiDriver] thinking_enabled=true but Gemini does not support native thinking mode; ignoring");
+        }
+
        let mut contents: Vec<GeminiContent> = Vec::new();

        for msg in &request.messages {
--- a/crates/zclaw-runtime/src/driver/local.rs
+++ b/crates/zclaw-runtime/src/driver/local.rs
@@ -58,6 +58,10 @@ impl LocalDriver {
    // ----------------------------------------------------------------

    fn build_api_request(&self, request: &CompletionRequest) -> LocalApiRequest {
+        if request.thinking_enabled {
+            tracing::debug!("[LocalDriver] thinking_enabled=true but local driver does not support native thinking mode; ignoring");
+        }
+
        let messages: Vec<LocalApiMessage> = request
            .messages
            .iter()
@@ -183,7 +187,7 @@ impl LocalDriver {
                    .unwrap_or(false);

                let blocks = if has_tool_calls {
-                    let tool_calls = c.message.tool_calls.as_ref().unwrap();
+                    let tool_calls = c.message.tool_calls.as_deref().unwrap_or_default();
                    tool_calls
                        .iter()
                        .map(|tc| {
@@ -199,7 +203,7 @@ impl LocalDriver {
                        .collect()
                } else if has_content {
                    vec![ContentBlock::Text {
-                        text: c.message.content.clone().unwrap(),
+                        text: c.message.content.clone().unwrap_or_default(),
                    }]
                } else {
                    vec![ContentBlock::Text {
--- a/crates/zclaw-runtime/src/driver/mod.rs
+++ b/crates/zclaw-runtime/src/driver/mod.rs
@@ -60,6 +60,15 @@ pub struct CompletionRequest {
    pub stop: Vec<String>,
    /// Enable streaming
    pub stream: bool,
+    /// Enable extended thinking/reasoning
+    #[serde(default)]
+    pub thinking_enabled: bool,
+    /// Reasoning effort level (for providers that support it)
+    #[serde(default)]
+    pub reasoning_effort: Option<String>,
+    /// Enable plan mode
+    #[serde(default)]
+    pub plan_mode: bool,
 }

 impl Default for CompletionRequest {
@@ -73,27 +82,16 @@ impl Default for CompletionRequest {
            temperature: Some(0.7),
            stop: Vec::new(),
            stream: false,
+            thinking_enabled: false,
+            reasoning_effort: None,
+            plan_mode: false,
        }
    }
 }

-/// Tool definition for LLM
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ToolDefinition {
-    pub name: String,
-    pub description: String,
-    pub input_schema: serde_json::Value,
-}
-
-impl ToolDefinition {
-    pub fn new(name: impl Into<String>, description: impl Into<String>, schema: serde_json::Value) -> Self {
-        Self {
-            name: name.into(),
-            description: description.into(),
-            input_schema: schema,
-        }
-    }
-}
+/// Tool definition for LLM function calling.
+/// Re-exported from `zclaw_types::tool::ToolDefinition` (canonical definition).
+pub use zclaw_types::tool::ToolDefinition;

 /// Completion response
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -110,7 +108,8 @@ pub struct CompletionResponse {
    pub stop_reason: StopReason,
 }

-/// Content block in response
+/// LLM driver response content block (subset of canonical zclaw_types::ContentBlock).
+/// Used internally by Anthropic/OpenAI/Gemini/Local drivers for API response parsing.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ContentBlock {
--- a/crates/zclaw-runtime/src/driver/openai.rs
+++ b/crates/zclaw-runtime/src/driver/openai.rs
@@ -130,8 +130,8 @@ impl LlmDriver for OpenAiDriver {
        let api_key = self.api_key.expose_secret().to_string();

        Box::pin(stream! {
-            println!("[OpenAI:stream] POST to {}/chat/completions", base_url);
-            println!("[OpenAI:stream] Request model={}, stream={}", stream_request.model, stream_request.stream);
+            tracing::debug!("[OpenAI:stream] POST to {}/chat/completions", base_url);
+            tracing::debug!("[OpenAI:stream] Request model={}, stream={}", stream_request.model, stream_request.stream);
            let response = match self.client
                .post(format!("{}/chat/completions", base_url))
                .header("Authorization", format!("Bearer {}", api_key))
@@ -142,11 +142,11 @@ impl LlmDriver for OpenAiDriver {
                .await
            {
                Ok(r) => {
-                    println!("[OpenAI:stream] Response status: {}, content-type: {:?}", r.status(), r.headers().get("content-type"));
+                    tracing::debug!("[OpenAI:stream] Response status: {}, content-type: {:?}", r.status(), r.headers().get("content-type"));
                    r
                },
                Err(e) => {
-                    println!("[OpenAI:stream] HTTP request FAILED: {:?}", e);
+                    tracing::debug!("[OpenAI:stream] HTTP request FAILED: {:?}", e);
                    yield Err(ZclawError::LlmError(format!("HTTP request failed: {}", e)));
                    return;
                }
@@ -155,7 +155,7 @@ impl LlmDriver for OpenAiDriver {
            if !response.status().is_success() {
                let status = response.status();
                let body = response.text().await.unwrap_or_default();
-                println!("[OpenAI:stream] API error {}: {}", status, &body[..body.len().min(500)]);
+                tracing::debug!("[OpenAI:stream] API error {}: {}", status, &body[..body.len().min(500)]);
                yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
                return;
            }
@@ -170,7 +170,7 @@ impl LlmDriver for OpenAiDriver {
                let chunk = match chunk_result {
                    Ok(c) => c,
                    Err(e) => {
-                        println!("[OpenAI:stream] Byte stream error: {:?}", e);
+                        tracing::debug!("[OpenAI:stream] Byte stream error: {:?}", e);
                        yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
                        continue;
                    }
@@ -180,7 +180,7 @@ impl LlmDriver for OpenAiDriver {
                let text = String::from_utf8_lossy(&chunk);
                // Log first 500 bytes of raw data for debugging SSE format
                if raw_bytes_total <= 600 {
-                    println!("[OpenAI:stream] RAW chunk ({} bytes): {:?}", text.len(), &text[..text.len().min(500)]);
+                    tracing::debug!("[OpenAI:stream] RAW chunk ({} bytes): {:?}", text.len(), &text[..text.len().min(500)]);
                }
                for line in text.lines() {
                    let trimmed = line.trim();
@@ -198,10 +198,10 @@ impl LlmDriver for OpenAiDriver {
                    if let Some(data) = data {
                        sse_event_count += 1;
                        if sse_event_count <= 3 || data == "[DONE]" {
-                            println!("[OpenAI:stream] SSE #{}: {}", sse_event_count, &data[..data.len().min(300)]);
+                            tracing::debug!("[OpenAI:stream] SSE #{}: {}", sse_event_count, &data[..data.len().min(300)]);
                        }
                        if data == "[DONE]" {
-                            println!("[OpenAI:stream] Received [DONE], total SSE events: {}, raw bytes: {}", sse_event_count, raw_bytes_total);
+                            tracing::debug!("[OpenAI:stream] Received [DONE], total SSE events: {}, raw bytes: {}", sse_event_count, raw_bytes_total);

                            // Emit ToolUseEnd for all accumulated tool calls (skip invalid ones with empty name)
                            for (id, (name, args)) in &accumulated_tool_calls {
@@ -319,7 +319,7 @@ impl LlmDriver for OpenAiDriver {
                    }
                }
            }
-            println!("[OpenAI:stream] Byte stream ended. Total: {} SSE events, {} raw bytes", sse_event_count, raw_bytes_total);
+            tracing::debug!("[OpenAI:stream] Byte stream ended. Total: {} SSE events, {} raw bytes", sse_event_count, raw_bytes_total);
        })
    }
 }
@@ -496,6 +496,7 @@ impl OpenAiDriver {
            stop: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
            stream: request.stream,
            tools: if tools.is_empty() { None } else { Some(tools) },
+            reasoning_effort: request.reasoning_effort.clone(),
        };

        // Pre-send payload size validation
@@ -581,8 +582,8 @@ impl OpenAiDriver {
                let has_reasoning = c.message.reasoning_content.as_ref().map(|t| !t.is_empty()).unwrap_or(false);

                let blocks = if has_tool_calls {
-                    // Tool calls take priority
-                    let tool_calls = c.message.tool_calls.as_ref().unwrap();
+                    // Tool calls take priority — safe to unwrap after has_tool_calls check
+                    let tool_calls = c.message.tool_calls.as_ref().cloned().unwrap_or_default();
                    tracing::debug!("[OpenAiDriver:convert_response] Using tool_calls: {} calls", tool_calls.len());
                    tool_calls.iter().map(|tc| ContentBlock::ToolUse {
                        id: tc.id.clone(),
@@ -590,15 +591,15 @@ impl OpenAiDriver {
                        input: serde_json::from_str(&tc.function.arguments).unwrap_or(serde_json::Value::Null),
                    }).collect()
                } else if has_content {
-                    // Non-empty content
-                    let text = c.message.content.as_ref().unwrap();
+                    // Non-empty content — safe to unwrap after has_content check
+                    let text = c.message.content.as_deref().unwrap_or("");
                    tracing::debug!("[OpenAiDriver:convert_response] Using text content: {} chars", text.len());
-                    vec![ContentBlock::Text { text: text.clone() }]
+                    vec![ContentBlock::Text { text: text.to_string() }]
                } else if has_reasoning {
                    // Content empty but reasoning_content present (Kimi, Qwen, DeepSeek)
-                    let reasoning = c.message.reasoning_content.as_ref().unwrap();
+                    let reasoning = c.message.reasoning_content.as_deref().unwrap_or("");
                    tracing::debug!("[OpenAiDriver:convert_response] Using reasoning_content: {} chars", reasoning.len());
-                    vec![ContentBlock::Text { text: reasoning.clone() }]
+                    vec![ContentBlock::Text { text: reasoning.to_string() }]
                } else {
                    // No content or tool_calls
                    tracing::debug!("[OpenAiDriver:convert_response] No content or tool_calls, using empty text");
@@ -771,6 +772,8 @@ struct OpenAiRequest {
    stream: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    tools: Option<Vec<OpenAiTool>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    reasoning_effort: Option<String>,
 }

 #[derive(Serialize)]
@@ -833,7 +836,7 @@ struct OpenAiResponse {
    usage: Option<OpenAiUsage>,
 }

-#[derive(Deserialize, Default)]
+#[derive(Deserialize, Default, Clone)]
 struct OpenAiChoice {
    #[serde(default)]
    message: OpenAiResponseMessage,
@@ -841,7 +844,7 @@ struct OpenAiChoice {
    finish_reason: Option<String>,
 }

-#[derive(Deserialize, Default)]
+#[derive(Deserialize, Default, Clone)]
 struct OpenAiResponseMessage {
    #[serde(default)]
    content: Option<String>,
@@ -851,7 +854,7 @@ struct OpenAiResponseMessage {
    tool_calls: Option<Vec<OpenAiToolCallResponse>>,
 }

-#[derive(Deserialize, Default)]
+#[derive(Deserialize, Default, Clone)]
 struct OpenAiToolCallResponse {
    #[serde(default)]
    id: String,
@@ -859,7 +862,7 @@ struct OpenAiToolCallResponse {
    function: FunctionCallResponse,
 }

-#[derive(Deserialize, Default)]
+#[derive(Deserialize, Default, Clone)]
 struct FunctionCallResponse {
    #[serde(default)]
    name: String,
--- a/crates/zclaw-runtime/src/growth.rs
+++ b/crates/zclaw-runtime/src/growth.rs
@@ -16,6 +16,7 @@ use zclaw_growth::{
    MemoryExtractor, MemoryRetriever, PromptInjector, RetrievalResult,
    VikingAdapter,
 };
+use zclaw_memory::{ExtractedFactBatch, Fact, FactCategory};
 use zclaw_types::{AgentId, Message, Result, SessionId};

 /// Growth system integration for AgentLoop
@@ -212,6 +213,80 @@ impl GrowthIntegration {
        Ok(count)
    }

+    /// Combined extraction: single LLM call that produces both stored memories
+    /// and structured facts, avoiding double extraction overhead.
+    ///
+    /// Returns `(memory_count, Option<ExtractedFactBatch>)` on success.
+    pub async fn extract_combined(
+        &self,
+        agent_id: &AgentId,
+        messages: &[Message],
+        session_id: &SessionId,
+    ) -> Result<Option<(usize, ExtractedFactBatch)>> {
+        if !self.config.enabled || !self.config.auto_extract {
+            return Ok(None);
+        }
+
+        // Single LLM extraction call
+        let extracted = self
+            .extractor
+            .extract(messages, session_id.clone())
+            .await
+            .unwrap_or_else(|e| {
+                tracing::warn!("[GrowthIntegration] Combined extraction failed: {}", e);
+                Vec::new()
+            });
+
+        if extracted.is_empty() {
+            return Ok(None);
+        }
+
+        let mem_count = extracted.len();
+
+        // Store raw memories
+        self.extractor
+            .store_memories(&agent_id.to_string(), &extracted)
+            .await?;
+
+        // Track learning event
+        self.tracker
+            .record_learning(agent_id, &session_id.to_string(), mem_count)
+            .await?;
+
+        // Convert same extracted memories to structured facts (no extra LLM call)
+        let facts: Vec<Fact> = extracted
+            .into_iter()
+            .map(|m| {
+                let category = match m.memory_type {
+                    zclaw_growth::types::MemoryType::Preference => FactCategory::Preference,
+                    zclaw_growth::types::MemoryType::Knowledge => FactCategory::Knowledge,
+                    zclaw_growth::types::MemoryType::Experience => FactCategory::Behavior,
+                    _ => FactCategory::General,
+                };
+                Fact::new(m.content, category, f64::from(m.confidence))
+                    .with_source(session_id.to_string())
+            })
+            .collect();
+
+        let batch = ExtractedFactBatch {
+            facts,
+            agent_id: agent_id.to_string(),
+            session_id: session_id.to_string(),
+        }
+        .deduplicate()
+        .filter_by_confidence(0.7);
+
+        if batch.is_empty() {
+            return Ok(Some((mem_count, ExtractedFactBatch {
+                facts: vec![],
+                agent_id: agent_id.to_string(),
+                session_id: session_id.to_string(),
+            })));
+        }
+
+        Ok(Some((mem_count, batch)))
+    }
+
    /// Retrieve memories for a query without injection
    pub async fn retrieve_memories(
        &self,
--- a/crates/zclaw-runtime/src/lib.rs
+++ b/crates/zclaw-runtime/src/lib.rs
@@ -16,6 +16,7 @@ pub mod stream;
 pub mod growth;
 pub mod compaction;
 pub mod middleware;
+pub mod prompt;

 // Re-export main types
 pub use driver::{
@@ -31,3 +32,4 @@ pub use zclaw_growth::VikingAdapter;
 pub use zclaw_growth::EmbeddingClient;
 pub use zclaw_growth::LlmDriverForExtraction;
 pub use compaction::{CompactionConfig, CompactionOutcome};
+pub use prompt::{PromptBuilder, PromptContext, PromptSection};
--- a/crates/zclaw-runtime/src/loop_runner.rs
+++ b/crates/zclaw-runtime/src/loop_runner.rs
@@ -14,6 +14,7 @@ use crate::loop_guard::{LoopGuard, LoopGuardResult};
 use crate::growth::GrowthIntegration;
 use crate::compaction::{self, CompactionConfig};
 use crate::middleware::{self, MiddlewareChain};
+use crate::prompt::{PromptBuilder, PromptContext};
 use zclaw_memory::MemoryStore;

 /// Agent loop runner
@@ -25,6 +26,8 @@ pub struct AgentLoop {
    loop_guard: Mutex<LoopGuard>,
    model: String,
    system_prompt: Option<String>,
+    /// Custom agent personality for prompt assembly
+    soul: Option<String>,
    max_tokens: u32,
    temperature: f32,
    skill_executor: Option<Arc<dyn SkillExecutor>>,
@@ -39,6 +42,12 @@ pub struct AgentLoop {
    /// delegated to the chain instead of the inline code below.
    /// When `None`, the legacy inline path is used (100% backward compatible).
    middleware_chain: Option<MiddlewareChain>,
+    /// Chat mode: extended thinking enabled
+    thinking_enabled: bool,
+    /// Chat mode: reasoning effort level
+    reasoning_effort: Option<String>,
+    /// Chat mode: plan mode
+    plan_mode: bool,
 }

 impl AgentLoop {
@@ -56,7 +65,8 @@ impl AgentLoop {
            loop_guard: Mutex::new(LoopGuard::default()),
            model: String::new(), // Must be set via with_model()
            system_prompt: None,
-            max_tokens: 4096,
+            soul: None,
+            max_tokens: 16384,
            temperature: 0.7,
            skill_executor: None,
            path_validator: None,
@@ -64,6 +74,9 @@ impl AgentLoop {
            compaction_threshold: 0,
            compaction_config: CompactionConfig::default(),
            middleware_chain: None,
+            thinking_enabled: false,
+            reasoning_effort: None,
+            plan_mode: false,
        }
    }

@@ -91,6 +104,30 @@ impl AgentLoop {
        self
    }

+    /// Set the agent personality (SOUL.md equivalent)
+    pub fn with_soul(mut self, soul: impl Into<String>) -> Self {
+        self.soul = Some(soul.into());
+        self
+    }
+
+    /// Enable extended thinking/reasoning mode
+    pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
+        self.thinking_enabled = enabled;
+        self
+    }
+
+    /// Set reasoning effort level (low/medium/high)
+    pub fn with_reasoning_effort(mut self, effort: impl Into<String>) -> Self {
+        self.reasoning_effort = Some(effort.into());
+        self
+    }
+
+    /// Enable plan mode
+    pub fn with_plan_mode(mut self, enabled: bool) -> Self {
+        self.plan_mode = enabled;
+        self
+    }
+
    /// Set max tokens
    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
        self.max_tokens = max_tokens;
@@ -214,7 +251,15 @@ impl AgentLoop {

        // Enhance system prompt — skip when middleware chain handles it
        let mut enhanced_prompt = if use_middleware {
-            self.system_prompt.clone().unwrap_or_default()
+            let prompt_ctx = PromptContext {
+                base_prompt: self.system_prompt.clone(),
+                soul: self.soul.clone(),
+                thinking_enabled: self.thinking_enabled,
+                plan_mode: self.plan_mode,
+                tool_definitions: self.tools.definitions(),
+                agent_name: None,
+            };
+            PromptBuilder::new().build(&prompt_ctx)
        } else if let Some(ref growth) = self.growth {
            let base = self.system_prompt.as_deref().unwrap_or("");
            growth.enhance_prompt(&self.agent_id, base, &input).await?
@@ -279,6 +324,9 @@ impl AgentLoop {
                temperature: Some(self.temperature),
                stop: Vec::new(),
                stream: false,
+                thinking_enabled: self.thinking_enabled,
+                reasoning_effort: self.reasoning_effort.clone(),
+                plan_mode: self.plan_mode,
            };

            // Call LLM
@@ -352,7 +400,12 @@ impl AgentLoop {
            // Create tool context and execute all tools
            let tool_context = self.create_tool_context(session_id.clone());
            let mut circuit_breaker_triggered = false;
+            let mut abort_result: Option<AgentLoopResult> = None;
            for (id, name, input) in tool_calls {
+                // Check if loop was already aborted
+                if abort_result.is_some() {
+                    break;
+                }
                // Check tool call safety — via middleware chain or inline loop guard
                if let Some(ref chain) = self.middleware_chain {
                    let mw_ctx_ref = middleware::MiddlewareContext {
@@ -382,6 +435,17 @@ impl AgentLoop {
                            messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), tool_result, false));
                            continue;
                        }
+                        middleware::ToolCallDecision::AbortLoop(reason) => {
+                            tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
+                            let msg = format!("{}\n已自动终止", reason);
+                            self.memory.append_message(&session_id, &Message::assistant(&msg)).await?;
+                            abort_result = Some(AgentLoopResult {
+                                response: msg,
+                                input_tokens: total_input_tokens,
+                                output_tokens: total_output_tokens,
+                                iterations,
+                            });
+                        }
                    }
                } else {
                    // Legacy inline path
@@ -421,6 +485,11 @@ impl AgentLoop {

            // Continue the loop - LLM will process tool results and generate final response

+            // If middleware aborted the loop, return immediately
+            if let Some(result) = abort_result {
+                break result;
+            }
+
            // If circuit breaker was triggered, terminate immediately
            if circuit_breaker_triggered {
                let msg = "检测到工具调用循环，已自动终止";
@@ -502,7 +571,15 @@ impl AgentLoop {

        // Enhance system prompt — skip when middleware chain handles it
        let mut enhanced_prompt = if use_middleware {
-            self.system_prompt.clone().unwrap_or_default()
+            let prompt_ctx = PromptContext {
+                base_prompt: self.system_prompt.clone(),
+                soul: self.soul.clone(),
+                thinking_enabled: self.thinking_enabled,
+                plan_mode: self.plan_mode,
+                tool_definitions: self.tools.definitions(),
+                agent_name: None,
+            };
+            PromptBuilder::new().build(&prompt_ctx)
        } else if let Some(ref growth) = self.growth {
            let base = self.system_prompt.as_deref().unwrap_or("");
            growth.enhance_prompt(&self.agent_id, base, &input).await?
@@ -552,6 +629,9 @@ impl AgentLoop {
        let model = self.model.clone();
        let max_tokens = self.max_tokens;
        let temperature = self.temperature;
+        let thinking_enabled = self.thinking_enabled;
+        let reasoning_effort = self.reasoning_effort.clone();
+        let plan_mode = self.plan_mode;

        tokio::spawn(async move {
            let mut messages = messages;
@@ -584,6 +664,9 @@ impl AgentLoop {
                    temperature: Some(temperature),
                    stop: Vec::new(),
                    stream: true,
+                    thinking_enabled,
+                    reasoning_effort: reasoning_effort.clone(),
+                    plan_mode,
                };

                let mut stream = driver.stream(request);
@@ -596,9 +679,12 @@ impl AgentLoop {
                let mut chunk_count: usize = 0;
                let mut text_delta_count: usize = 0;
                let mut thinking_delta_count: usize = 0;
-                while let Some(chunk_result) = stream.next().await {
-                    match chunk_result {
-                        Ok(chunk) => {
+                let mut stream_errored = false;
+                let chunk_timeout = std::time::Duration::from_secs(60);
+
+                loop {
+                    match tokio::time::timeout(chunk_timeout, stream.next()).await {
+                        Ok(Some(Ok(chunk))) => {
                            chunk_count += 1;
                            match &chunk {
                                StreamChunk::TextDelta { delta } => {
@@ -610,8 +696,8 @@ impl AgentLoop {
                                StreamChunk::ThinkingDelta { delta } => {
                                    thinking_delta_count += 1;
                                    tracing::debug!("[AgentLoop] ThinkingDelta #{}: {} chars", thinking_delta_count, delta.len());
-                                    // Accumulate reasoning separately — not mixed into iteration_text
                                    reasoning_text.push_str(delta);
+                                    let _ = tx.send(LoopEvent::ThinkingDelta(delta.clone())).await;
                                }
                                StreamChunk::ToolUseStart { id, name } => {
                                    tracing::debug!("[AgentLoop] ToolUseStart: id={}, name={}", id, name);
@@ -651,21 +737,43 @@ impl AgentLoop {
                                StreamChunk::Error { message } => {
                                    tracing::error!("[AgentLoop] Stream error: {}", message);
                                    let _ = tx.send(LoopEvent::Error(message.clone())).await;
+                                    stream_errored = true;
                                }
                            }
                        }
-                        Err(e) => {
+                        Ok(Some(Err(e))) => {
                            tracing::error!("[AgentLoop] Chunk error: {}", e);
-                            let _ = tx.send(LoopEvent::Error(e.to_string())).await;
+                            let _ = tx.send(LoopEvent::Error(format!("LLM 锥应错误: {}", e.to_string()))).await;
+                            stream_errored = true;
                        }
+                        Ok(None) => break, // Stream ended normally
+                        Err(_) => {
+                            tracing::error!("[AgentLoop] Stream chunk timeout ({}s)", chunk_timeout.as_secs());
+                            let _ = tx.send(LoopEvent::Error("LLM 响应超时，请重试".to_string())).await;
+                            stream_errored = true;
+                        }
+                    }
+                    if stream_errored {
+                        break;
                    }
                }
                tracing::info!("[AgentLoop] Stream ended: {} total chunks (text={}, thinking={}, tools={}), iteration_text={} chars",
                    chunk_count, text_delta_count, thinking_delta_count, pending_tool_calls.len(),
                    iteration_text.len());
-                if iteration_text.is_empty() {
-                    tracing::warn!("[AgentLoop] WARNING: iteration_text is EMPTY after {} chunks! text_delta={}, thinking_delta={}",
-                        chunk_count, text_delta_count, thinking_delta_count);
+
+                // Fallback: if model generated reasoning but no text content,
+                // use reasoning as text response. This happens with some thinking models
+                // (DeepSeek R1, QWQ) that put the answer in reasoning_content instead of content.
+                // Safe now because: (1) context is clean (no stale user_profile/memory injection),
+                // (2) max_tokens=16384 prevents truncation, (3) reasoning is about the correct topic.
+                if iteration_text.is_empty() && !reasoning_text.is_empty() {
+                    tracing::info!("[AgentLoop] Model generated {} chars of reasoning but no text — using reasoning as response",
+                        reasoning_text.len());
+                    let _ = tx.send(LoopEvent::Delta(reasoning_text.clone())).await;
+                    iteration_text = reasoning_text.clone();
+                } else if iteration_text.is_empty() {
+                    tracing::warn!("[AgentLoop] No text content after {} chunks (thinking_delta={})",
+                        chunk_count, thinking_delta_count);
                }

                // If no tool calls, we have the final response
@@ -706,6 +814,12 @@ impl AgentLoop {
                    break 'outer;
                }

+                // Skip tool processing if stream errored or timed out
+                if stream_errored {
+                    tracing::debug!("[AgentLoop] Stream errored, skipping tool processing and breaking");
+                    break 'outer;
+                }
+
                tracing::debug!("[AgentLoop] Processing {} tool calls (reasoning: {} chars)", pending_tool_calls.len(), reasoning_text.len());

                // Push assistant message with reasoning before tool calls (required by Kimi and other thinking-enabled APIs)
@@ -745,6 +859,11 @@ impl AgentLoop {
                                messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
                                continue;
                            }
+                            Ok(middleware::ToolCallDecision::AbortLoop(reason)) => {
+                                tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
+                                let _ = tx.send(LoopEvent::Error(reason)).await;
+                                break 'outer;
+                            }
                            Ok(middleware::ToolCallDecision::ReplaceInput(new_input)) => {
                                // Execute with replaced input (same path_validator logic below)
                                let pv = path_validator.clone().unwrap_or_else(|| {
@@ -883,6 +1002,8 @@ pub struct AgentLoopResult {
 pub enum LoopEvent {
    /// Text delta from LLM
    Delta(String),
+    /// Thinking/reasoning delta from LLM (extended thinking)
+    ThinkingDelta(String),
    /// Tool execution started
    ToolStart { name: String, input: serde_json::Value },
    /// Tool execution completed
--- a/crates/zclaw-runtime/src/middleware.rs
+++ b/crates/zclaw-runtime/src/middleware.rs
@@ -41,6 +41,8 @@ pub enum ToolCallDecision {
    Block(String),
    /// Allow the call but replace the tool input with *new_input*.
    ReplaceInput(Value),
+    /// Terminate the entire agent loop immediately (e.g. circuit breaker).
+    AbortLoop(String),
 }

 // ---------------------------------------------------------------------------
@@ -194,6 +196,25 @@ impl MiddlewareChain {
        Ok(ToolCallDecision::Allow)
    }

+    /// Run all `before_tool_call` hooks with mutable context.
+    pub async fn run_before_tool_call_mut(
+        &self,
+        ctx: &mut MiddlewareContext,
+        tool_name: &str,
+        tool_input: &Value,
+    ) -> Result<ToolCallDecision> {
+        for mw in &self.middlewares {
+            match mw.before_tool_call(ctx, tool_name, tool_input).await? {
+                ToolCallDecision::Allow => {}
+                other => {
+                    tracing::info!("[MiddlewareChain] '{}' decided {:?} for tool '{}'", mw.name(), other, tool_name);
+                    return Ok(other);
+                }
+            }
+        }
+        Ok(ToolCallDecision::Allow)
+    }
+
    /// Run all `after_tool_call` hooks in order.
    pub async fn run_after_tool_call(
        &self,
@@ -245,8 +266,13 @@ impl Default for MiddlewareChain {
 // ---------------------------------------------------------------------------

 pub mod compaction;
+pub mod dangling_tool;
 pub mod guardrail;
 pub mod loop_guard;
 pub mod memory;
 pub mod skill_index;
+pub mod subagent_limit;
+pub mod title;
 pub mod token_calibration;
+pub mod tool_error;
+pub mod tool_output_guard;
--- a/crates/zclaw-runtime/src/middleware/dangling_tool.rs
+++ b/crates/zclaw-runtime/src/middleware/dangling_tool.rs
@@ -0,0 +1,125 @@
+//! Dangling tool-call repair middleware — detects and patches missing tool-result
+//! messages that would cause LLM API errors.
+//!
+//! When the LLM produces a `ToolUse` content block but the agent loop fails to
+//! produce a corresponding `ToolResult` message (e.g. due to a crash or timeout),
+//! the conversation history becomes inconsistent. The next LLM call would fail with
+//! an API error because ToolUse messages must be followed by ToolResult messages.
+//!
+//! This middleware inspects the message history before each completion and appends
+//! placeholder ToolResult messages for any dangling ToolUse entries.
+
+use std::collections::HashSet;
+
+use async_trait::async_trait;
+use zclaw_types::{Message, Result};
+use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
+
+/// Middleware that repairs dangling tool-use blocks in conversation history.
+///
+/// Priority 300 — runs before tool error middleware (350) and guardrail (400).
+pub struct DanglingToolMiddleware;
+
+impl DanglingToolMiddleware {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+impl Default for DanglingToolMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for DanglingToolMiddleware {
+    fn name(&self) -> &str { "dangling_tool" }
+    fn priority(&self) -> i32 { 300 }
+
+    async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
+        let mut patched_count = 0usize;
+
+        // Step 1: Collect all ToolUse IDs and matched ToolResult IDs across the
+        // entire message list (not just adjacent pairs).
+        let mut tool_use_ids: Vec<(String, String)> = Vec::new(); // (id, tool_name)
+        let mut tool_result_ids: HashSet<String> = HashSet::new();
+
+        for msg in &ctx.messages {
+            match msg {
+                Message::ToolUse { ref id, ref tool, .. } => {
+                    tool_use_ids.push((id.clone(), tool.as_str().to_string()));
+                }
+                Message::ToolResult { ref tool_call_id, ref output, .. } => {
+                    // Original results always count as matched regardless of patch status.
+                    // We insert unconditionally so that the HashSet contains the ID,
+                    // preventing false-positive "dangling" detection.
+                    let _ = output; // suppress unused warning — patch check is informational only
+                    tool_result_ids.insert(tool_call_id.clone());
+                }
+                _ => {}
+            }
+        }
+
+        // Step 2: Find dangling ToolUse entries that have no matching ToolResult.
+        let dangling_ids: HashSet<String> = tool_use_ids.iter()
+            .filter(|(id, _)| !tool_result_ids.contains(id))
+            .map(|(id, _)| id.clone())
+            .collect();
+
+        if dangling_ids.is_empty() {
+            return Ok(MiddlewareDecision::Continue);
+        }
+
+        // Step 3: Insert placeholder ToolResult for each dangling ToolUse.
+        // Also skip ToolUse entries that already have a patched placeholder further
+        // down the list (prevents double-patching if the middleware runs twice).
+        let capacity = ctx.messages.len() + dangling_ids.len();
+        let mut patched_messages: Vec<Message> = Vec::with_capacity(capacity);
+
+        for msg in &ctx.messages {
+            patched_messages.push(msg.clone());
+
+            if let Message::ToolUse { ref id, ref tool, .. } = msg {
+                if dangling_ids.contains(id) {
+                    tracing::warn!(
+                        "[DanglingToolMiddleware] Patching dangling ToolUse: tool={}, id={}",
+                        tool.as_str(), id
+                    );
+                    let placeholder = Message::tool_result(
+                        id.clone(),
+                        tool.clone(),
+                        serde_json::json!({
+                            "error": "Tool execution was interrupted. Please retry or use an alternative approach.",
+                            "tool_patch": true,
+                        }),
+                        true, // is_error
+                    );
+                    patched_messages.push(placeholder);
+                    patched_count += 1;
+                }
+            }
+        }
+
+        // Step 4: Detect streaming interrupt — if the last message is an Assistant
+        // response while there were dangling tools, the user likely interrupted a
+        // streaming response mid-tool-execution.  No additional action is needed
+        // beyond the patched ToolResult messages that now prevent API errors.
+        if let Some(Message::Assistant { .. }) = patched_messages.last() {
+            tracing::debug!(
+                "[DanglingToolMiddleware] Streaming interrupt detected with {} dangling tools",
+                patched_count
+            );
+        }
+
+        if patched_count > 0 {
+            tracing::info!(
+                "[DanglingToolMiddleware] Patched {} dangling tool-use blocks",
+                patched_count
+            );
+            ctx.messages = patched_messages;
+        }
+
+        Ok(MiddlewareDecision::Continue)
+    }
+}
--- a/crates/zclaw-runtime/src/middleware/loop_guard.rs
+++ b/crates/zclaw-runtime/src/middleware/loop_guard.rs
@@ -41,7 +41,7 @@ impl AgentMiddleware for LoopGuardMiddleware {
        match result {
            LoopGuardResult::CircuitBreaker => {
                tracing::warn!("[LoopGuardMiddleware] Circuit breaker triggered by tool '{}'", tool_name);
-                Ok(ToolCallDecision::Block("检测到工具调用循环，已自动终止".to_string()))
+                Ok(ToolCallDecision::AbortLoop("检测到工具调用循环，已自动终止".to_string()))
            }
            LoopGuardResult::Blocked => {
                tracing::warn!("[LoopGuardMiddleware] Tool '{}' blocked", tool_name);
--- a/crates/zclaw-runtime/src/middleware/memory.rs
+++ b/crates/zclaw-runtime/src/middleware/memory.rs
@@ -60,34 +60,39 @@ impl AgentMiddleware for MemoryMiddleware {
    fn priority(&self) -> i32 { 150 }

    async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
-        // Skip memory injection for very short queries.
-        // Short queries (e.g., "1+6", "hi", "好") don't benefit from memory context.
-        // Worse, the retriever's scope-based fallback may return high-importance but
-        // irrelevant old memories, causing the model to think about past conversations
-        // instead of answering the current question.
-        // Use char count (not byte count) so CJK queries are handled correctly:
-        // a single Chinese char is 3 UTF-8 bytes but 1 meaningful character.
-        let query = ctx.user_input.trim();
-        if query.chars().count() < 2 {
-            tracing::debug!(
-                "[MemoryMiddleware] Skipping enhancement for short query ({:?}): no memory context needed",
-                query
-            );
-            return Ok(MiddlewareDecision::Continue);
-        }
+        tracing::debug!(
+            "[MemoryMiddleware] before_completion for query: {:?}",
+            ctx.user_input.chars().take(50).collect::<String>()
+        );

-        match self.growth.enhance_prompt(
-            &ctx.agent_id,
-            &ctx.system_prompt,
-            &ctx.user_input,
-        ).await {
+        // Retrieve relevant memories and inject into system prompt.
+        // The SqliteStorage retriever now uses FTS5-only matching — if FTS5 finds
+        // no relevant results, no memories are returned (no scope-based fallback).
+        // This prevents irrelevant high-importance memories from leaking into
+        // unrelated conversations.
+        let base = &ctx.system_prompt;
+        match self.growth.enhance_prompt(&ctx.agent_id, base, &ctx.user_input).await {
            Ok(enhanced) => {
-                ctx.system_prompt = enhanced;
+                if enhanced != *base {
+                    tracing::info!(
+                        "[MemoryMiddleware] Injected memories into system prompt for agent {}",
+                        ctx.agent_id
+                    );
+                    ctx.system_prompt = enhanced;
+                } else {
+                    tracing::debug!(
+                        "[MemoryMiddleware] No relevant memories found for query: {:?}",
+                        ctx.user_input.chars().take(50).collect::<String>()
+                    );
+                }
                Ok(MiddlewareDecision::Continue)
            }
            Err(e) => {
-                // Non-fatal: memory retrieval failure should not block the loop
-                tracing::warn!("[MemoryMiddleware] Prompt enhancement failed: {}", e);
+                // Non-fatal: retrieval failure should not block the conversation
+                tracing::warn!(
+                    "[MemoryMiddleware] Memory retrieval failed (non-fatal): {}",
+                    e
+                );
                Ok(MiddlewareDecision::Continue)
            }
        }
--- a/crates/zclaw-runtime/src/middleware/subagent_limit.rs
+++ b/crates/zclaw-runtime/src/middleware/subagent_limit.rs
@@ -0,0 +1,87 @@
+//! Sub-agent limit middleware — enforces limits on sub-agent spawning.
+//!
+//! Prevents runaway sub-agent spawning by enforcing a per-turn total cap.
+//! The `running` counter was removed because it leaked when subsequent
+//! middleware blocked the tool call (before_tool_call increments but
+//! after_tool_call never fires for blocked tools).
+
+use async_trait::async_trait;
+use serde_json::Value;
+use zclaw_types::Result;
+use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
+
+/// Default maximum total sub-agents per conversation turn.
+const DEFAULT_MAX_TOTAL: usize = 10;
+
+/// Middleware that limits total sub-agent spawn count per turn.
+///
+/// Priority 550 — runs after loop guard (500).
+pub struct SubagentLimitMiddleware {
+    /// Maximum total sub-agents per conversation turn.
+    max_total: usize,
+    /// Total sub-agents spawned in this turn.
+    total_spawned: std::sync::atomic::AtomicUsize,
+}
+
+impl SubagentLimitMiddleware {
+    pub fn new() -> Self {
+        Self {
+            max_total: DEFAULT_MAX_TOTAL,
+            total_spawned: std::sync::atomic::AtomicUsize::new(0),
+        }
+    }
+
+    pub fn with_max_total(mut self, n: usize) -> Self {
+        self.max_total = n;
+        self
+    }
+
+    /// Check if a tool call is a sub-agent spawn request.
+    fn is_subagent_tool(tool_name: &str) -> bool {
+        matches!(tool_name, "task" | "delegate" | "spawn_agent" | "subagent")
+    }
+}
+
+impl Default for SubagentLimitMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for SubagentLimitMiddleware {
+    fn name(&self) -> &str { "subagent_limit" }
+    fn priority(&self) -> i32 { 550 }
+
+    async fn before_tool_call(
+        &self,
+        _ctx: &MiddlewareContext,
+        tool_name: &str,
+        _tool_input: &Value,
+    ) -> Result<ToolCallDecision> {
+        if !Self::is_subagent_tool(tool_name) {
+            return Ok(ToolCallDecision::Allow);
+        }
+
+        let total = self.total_spawned.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
+        if total >= self.max_total {
+            self.total_spawned.fetch_sub(1, std::sync::atomic::Ordering::SeqCst);
+            tracing::warn!(
+                "[SubagentLimitMiddleware] Total sub-agent limit ({}) reached — blocking spawn",
+                self.max_total
+            );
+            return Ok(ToolCallDecision::Block(format!(
+                "子Agent总数量已达上限 ({})，请优先完成现有任务后再发起新任务。",
+                self.max_total
+            )));
+        }
+
+        Ok(ToolCallDecision::Allow)
+    }
+
+    async fn after_completion(&self, _ctx: &MiddlewareContext) -> Result<()> {
+        // Reset per-turn counter after the agent loop turn completes.
+        self.total_spawned.store(0, std::sync::atomic::Ordering::SeqCst);
+        Ok(())
+    }
+}
--- a/crates/zclaw-runtime/src/middleware/title.rs
+++ b/crates/zclaw-runtime/src/middleware/title.rs
@@ -5,22 +5,29 @@
 //! "新对话" or truncating the user's first message.
 //!
 //! Priority 180 — runs after compaction (100) and memory (150), before skill index (200).
+//!
+//! NOTE: This is a structural placeholder. Full implementation requires an LLM driver
+//! reference to generate titles asynchronously, which will be wired through the
+//! middleware context in a future iteration. For now it simply passes through.

 use async_trait::async_trait;
-use zclaw_types::Result;
-use crate::middleware::{AgentMiddleware, MiddlewareContext};
+use crate::middleware::{AgentMiddleware, MiddlewareDecision};

 /// Middleware that auto-generates conversation titles after the first exchange.
+///
+/// When fully implemented, this will:
+/// 1. Detect the first user-assistant exchange (via message count)
+/// 2. Call the LLM with a short prompt to generate a descriptive title
+/// 3. Update the session title via the middleware context
+///
+/// For now, it serves as a registered placeholder in the middleware chain.
 pub struct TitleMiddleware {
-    /// Whether a title has been generated for the current session.
-    titled: std::sync::atomic::AtomicBool,
+    _reserved: (),
 }

 impl TitleMiddleware {
    pub fn new() -> Self {
-        Self {
-            titled: std::sync::atomic::AtomicBool::new(false),
-        }
+        Self { _reserved: () }
    }
 }

@@ -34,4 +41,9 @@ impl Default for TitleMiddleware {
 impl AgentMiddleware for TitleMiddleware {
    fn name(&self) -> &str { "title" }
    fn priority(&self) -> i32 { 180 }
+
+    // All hooks default to Continue — placeholder until LLM driver is wired in.
+    async fn before_completion(&self, _ctx: &mut crate::middleware::MiddlewareContext) -> zclaw_types::Result<MiddlewareDecision> {
+        Ok(MiddlewareDecision::Continue)
+    }
 }
--- a/crates/zclaw-runtime/src/middleware/tool_error.rs
+++ b/crates/zclaw-runtime/src/middleware/tool_error.rs
@@ -0,0 +1,111 @@
+//! Tool error middleware — catches tool execution errors and converts them
+//! into well-formed tool-result messages for the LLM to recover from.
+//!
+//! Inspired by DeerFlow's ToolErrorMiddleware: instead of propagating raw errors
+//! that crash the agent loop, this middleware wraps tool errors into a structured
+//! format that the LLM can use to self-correct.
+
+use async_trait::async_trait;
+use serde_json::Value;
+use zclaw_types::Result;
+use crate::driver::ContentBlock;
+use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
+
+/// Middleware that intercepts tool call errors and formats recovery messages.
+///
+/// Priority 350 — runs after dangling tool repair (300) and before guardrail (400).
+pub struct ToolErrorMiddleware {
+    /// Maximum error message length before truncation.
+    max_error_length: usize,
+}
+
+impl ToolErrorMiddleware {
+    pub fn new() -> Self {
+        Self {
+            max_error_length: 500,
+        }
+    }
+
+    /// Create with a custom max error length.
+    pub fn with_max_error_length(mut self, len: usize) -> Self {
+        self.max_error_length = len;
+        self
+    }
+
+    /// Format a tool error into a guided recovery message for the LLM.
+    ///
+    /// The caller is responsible for truncation before passing `error`.
+    fn format_tool_error(&self, tool_name: &str, error: &str) -> String {
+        format!(
+            "工具 '{}' 执行失败。错误信息: {}\n请分析错误原因，尝试修正参数后重试，或使用其他方法完成任务。",
+            tool_name, error
+        )
+    }
+}
+
+impl Default for ToolErrorMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for ToolErrorMiddleware {
+    fn name(&self) -> &str { "tool_error" }
+    fn priority(&self) -> i32 { 350 }
+
+    async fn before_tool_call(
+        &self,
+        _ctx: &MiddlewareContext,
+        tool_name: &str,
+        tool_input: &Value,
+    ) -> Result<ToolCallDecision> {
+        // Pre-validate tool input structure for common issues.
+        // This catches malformed JSON inputs before they reach the tool executor.
+        if tool_input.is_null() {
+            tracing::warn!(
+                "[ToolErrorMiddleware] Tool '{}' received null input — replacing with empty object",
+                tool_name
+            );
+            return Ok(ToolCallDecision::ReplaceInput(serde_json::json!({})));
+        }
+        Ok(ToolCallDecision::Allow)
+    }
+
+    async fn after_tool_call(
+        &self,
+        ctx: &mut MiddlewareContext,
+        tool_name: &str,
+        result: &Value,
+    ) -> Result<()> {
+        // Check if the tool result indicates an error.
+        if let Some(error) = result.get("error") {
+            let error_msg = match error {
+                Value::String(s) => s.clone(),
+                other => other.to_string(),
+            };
+            let truncated = if error_msg.len() > self.max_error_length {
+                // Use char-boundary-safe truncation to avoid panic on UTF-8 strings (e.g. Chinese)
+                let end = error_msg.floor_char_boundary(self.max_error_length);
+                format!("{}...(truncated)", &error_msg[..end])
+            } else {
+                error_msg.clone()
+            };
+
+            tracing::warn!(
+                "[ToolErrorMiddleware] Tool '{}' failed: {}",
+                tool_name, truncated
+            );
+
+            // Build a guided recovery message so the LLM can self-correct.
+            let guided_message = self.format_tool_error(tool_name, &truncated);
+
+            // Inject into response_content so the agent loop feeds this back
+            // to the LLM alongside the raw tool result.
+            ctx.response_content.push(ContentBlock::Text {
+                text: guided_message,
+            });
+        }
+        Ok(())
+    }
+}
--- a/crates/zclaw-runtime/src/middleware/tool_output_guard.rs
+++ b/crates/zclaw-runtime/src/middleware/tool_output_guard.rs
@@ -0,0 +1,132 @@
+//! Tool output sanitization middleware — inspects tool results for risky content
+//! before they flow back into the LLM context.
+//!
+//! Inspired by DeerFlow's missing "Toxic Output Loop" defense — ZCLAW proactively
+//! implements post-execution output checking.
+//!
+//! Rules:
+//! - Output length cap: warns when tool output exceeds threshold
+//! - Sensitive pattern detection: flags API keys, tokens, passwords
+//! - Injection marker detection: flags common prompt-injection patterns
+//!
+//! This middleware does NOT modify content. It only logs warnings at appropriate levels.
+
+use async_trait::async_trait;
+use serde_json::Value;
+use zclaw_types::Result;
+
+use crate::middleware::{AgentMiddleware, MiddlewareContext, ToolCallDecision};
+
+/// Maximum safe output length in characters.
+const MAX_OUTPUT_LENGTH: usize = 50_000;
+
+/// Patterns that indicate sensitive information in tool output.
+const SENSITIVE_PATTERNS: &[&str] = &[
+    "api_key",
+    "apikey",
+    "api-key",
+    "secret_key",
+    "secretkey",
+    "access_token",
+    "auth_token",
+    "password",
+    "private_key",
+    "-----BEGIN RSA",
+    "-----BEGIN PRIVATE",
+    "sk-",           // OpenAI API keys
+    "sk_live_",      // Stripe keys
+    "AKIA",          // AWS access keys
+];
+
+/// Patterns that may indicate prompt injection in tool output.
+const INJECTION_PATTERNS: &[&str] = &[
+    "ignore previous instructions",
+    "ignore all previous",
+    "disregard your instructions",
+    "you are now",
+    "new instructions:",
+    "system:",
+    "[INST]",
+    "</scratchpad>",
+    "think step by step about",
+];
+
+/// Tool output sanitization middleware.
+///
+/// Priority 360 — runs after ToolErrorMiddleware (350), before GuardrailMiddleware (400).
+pub struct ToolOutputGuardMiddleware {
+    max_output_length: usize,
+}
+
+impl ToolOutputGuardMiddleware {
+    pub fn new() -> Self {
+        Self {
+            max_output_length: MAX_OUTPUT_LENGTH,
+        }
+    }
+}
+
+impl Default for ToolOutputGuardMiddleware {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl AgentMiddleware for ToolOutputGuardMiddleware {
+    fn name(&self) -> &str { "tool_output_guard" }
+    fn priority(&self) -> i32 { 360 }
+
+    async fn before_tool_call(
+        &self,
+        _ctx: &MiddlewareContext,
+        _tool_name: &str,
+        _tool_input: &Value,
+    ) -> Result<ToolCallDecision> {
+        // No pre-execution checks — this middleware only inspects output
+        Ok(ToolCallDecision::Allow)
+    }
+
+    async fn after_tool_call(
+        &self,
+        _ctx: &mut MiddlewareContext,
+        tool_name: &str,
+        result: &Value,
+    ) -> Result<()> {
+        let output_str = serde_json::to_string(result).unwrap_or_default();
+        let output_len = output_str.len();
+
+        // Rule 1: Output length check
+        if output_len > self.max_output_length {
+            tracing::warn!(
+                "[ToolOutputGuard] Tool '{}' returned oversized output: {} chars (limit: {})",
+                tool_name, output_len, self.max_output_length
+            );
+        }
+
+        // Rule 2: Sensitive information detection
+        let output_lower = output_str.to_lowercase();
+        for pattern in SENSITIVE_PATTERNS {
+            if output_lower.contains(pattern) {
+                tracing::warn!(
+                    "[ToolOutputGuard] Tool '{}' output contains sensitive pattern: '{}'",
+                    tool_name, pattern
+                );
+                break; // Only warn once per tool call
+            }
+        }
+
+        // Rule 3: Injection marker detection
+        for pattern in INJECTION_PATTERNS {
+            if output_lower.contains(pattern) {
+                tracing::warn!(
+                    "[ToolOutputGuard] Tool '{}' output contains potential injection marker: '{}'",
+                    tool_name, pattern
+                );
+                break; // Only warn once per tool call
+            }
+        }
+
+        Ok(())
+    }
+}
--- a/crates/zclaw-runtime/src/prompt/builder.rs
+++ b/crates/zclaw-runtime/src/prompt/builder.rs
@@ -0,0 +1,120 @@
+use std::fmt::Write;
+
+use crate::driver::ToolDefinition;
+
+/// Runtime context that determines which prompt sections are included.
+pub struct PromptContext {
+    /// Base system prompt from AgentConfig
+    pub base_prompt: Option<String>,
+    /// Custom agent personality (SOUL.md equivalent)
+    pub soul: Option<String>,
+    /// Whether thinking/extended reasoning is enabled
+    pub thinking_enabled: bool,
+    /// Whether plan mode is active
+    pub plan_mode: bool,
+    /// Tool definitions available for dynamic injection
+    pub tool_definitions: Vec<ToolDefinition>,
+    /// Agent name for personalization
+    pub agent_name: Option<String>,
+}
+
+/// A single section in the assembled prompt.
+pub struct PromptSection {
+    pub name: &'static str,
+    pub template: String,
+    pub priority: u32,
+}
+
+/// Builds structured system prompts from conditional sections.
+pub struct PromptBuilder {
+    sections: Vec<PromptSection>,
+}
+
+impl PromptBuilder {
+    pub fn new() -> Self {
+        Self {
+            sections: Vec::new(),
+        }
+    }
+
+    /// Add a section unconditionally.
+    pub fn add_section(
+        mut self,
+        name: &'static str,
+        template: impl Into<String>,
+        priority: u32,
+    ) -> Self {
+        self.sections.push(PromptSection {
+            name,
+            template: template.into(),
+            priority,
+        });
+        self
+    }
+
+    /// Assemble the final system prompt based on runtime context.
+    pub fn build(&self, ctx: &PromptContext) -> String {
+        let mut sections: Vec<&PromptSection> = self.sections.iter().collect();
+        sections.sort_by_key(|s| s.priority);
+
+        let mut result = String::with_capacity(4096);
+
+        // Base prompt (always included)
+        if let Some(ref base) = ctx.base_prompt {
+            result.push_str(base);
+        } else {
+            result.push_str("You are a helpful AI assistant.");
+        }
+
+        // Soul/personality section
+        if let Some(ref soul) = ctx.soul {
+            result.push_str("\n\n## Agent Personality\n\n");
+            result.push_str(soul);
+        }
+
+        // Agent name personalization
+        if let Some(ref name) = ctx.agent_name {
+            let _ = write!(result, "\n\nYou are known as \"{name}\". Respond in character.");
+        }
+
+        // Dynamic tool descriptions
+        if !ctx.tool_definitions.is_empty() {
+            result.push_str("\n\n## Available Tools\n\n");
+            for tool in &ctx.tool_definitions {
+                let _ = writeln!(result, "- **{}**: {}", tool.name, tool.description);
+            }
+        }
+
+        // Thinking style guidance
+        if ctx.thinking_enabled {
+            result.push_str("\n\n## Reasoning Mode\n\n");
+            result.push_str(
+                "Extended reasoning is enabled. Think step-by-step before responding. \
+                 Show your reasoning process, then provide the final answer.",
+            );
+        }
+
+        // Plan mode instructions
+        if ctx.plan_mode {
+            result.push_str("\n\n## Plan Mode\n\n");
+            result.push_str(
+                "You are in plan mode. Before executing any actions, create a detailed plan. \
+                 Present the plan to the user for approval before proceeding.",
+            );
+        }
+
+        // Additional registered sections
+        for section in sections {
+            result.push_str("\n\n");
+            result.push_str(&section.template);
+        }
+
+        result
+    }
+}
+
+impl Default for PromptBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
--- a/crates/zclaw-runtime/src/prompt/mod.rs
+++ b/crates/zclaw-runtime/src/prompt/mod.rs
@@ -0,0 +1,9 @@
+//! Dynamic prompt assembly module.
+//!
+//! Inspired by DeerFlow's conditional section-based prompt composition.
+//! The `PromptBuilder` assembles a structured system prompt from multiple
+//! conditional sections before the middleware chain further modifies it.
+
+mod builder;
+
+pub use builder::{PromptBuilder, PromptContext, PromptSection};
--- a/crates/zclaw-runtime/src/tool/builtin.rs
+++ b/crates/zclaw-runtime/src/tool/builtin.rs
@@ -7,6 +7,7 @@ mod web_fetch;
 mod execute_skill;
 mod skill_load;
 mod path_validator;
+mod task;

 pub use file_read::FileReadTool;
 pub use file_write::FileWriteTool;
@@ -15,6 +16,7 @@ pub use web_fetch::WebFetchTool;
 pub use execute_skill::ExecuteSkillTool;
 pub use skill_load::SkillLoadTool;
 pub use path_validator::{PathValidator, PathValidatorConfig};
+pub use task::TaskTool;

 use crate::tool::ToolRegistry;

--- a/crates/zclaw-runtime/src/tool/builtin/task.rs
+++ b/crates/zclaw-runtime/src/tool/builtin/task.rs
@@ -0,0 +1,179 @@
+//! Task tool — delegates sub-tasks to a nested AgentLoop.
+//!
+//! Inspired by DeerFlow's `task_tool`: the lead agent can spawn sub-agent tasks
+//! to parallelise complex work. Each sub-task runs its own AgentLoop with a
+//! fresh session, isolated context, and a configurable maximum iteration count.
+
+use async_trait::async_trait;
+use serde_json::{json, Value};
+use zclaw_types::{AgentId, Result, ZclawError};
+use zclaw_memory::MemoryStore;
+
+use crate::driver::LlmDriver;
+use crate::loop_runner::AgentLoop;
+use crate::tool::{Tool, ToolContext, ToolRegistry};
+use crate::tool::builtin::register_builtin_tools;
+use std::sync::Arc;
+
+/// Default max iterations for a sub-agent task.
+const DEFAULT_MAX_ITERATIONS: usize = 5;
+
+/// Tool that delegates sub-tasks to a nested AgentLoop.
+pub struct TaskTool {
+    driver: Arc<dyn LlmDriver>,
+    memory: Arc<MemoryStore>,
+    model: String,
+    max_tokens: u32,
+    temperature: f32,
+}
+
+impl TaskTool {
+    pub fn new(
+        driver: Arc<dyn LlmDriver>,
+        memory: Arc<MemoryStore>,
+        model: impl Into<String>,
+    ) -> Self {
+        Self {
+            driver,
+            memory,
+            model: model.into(),
+            max_tokens: 4096,
+            temperature: 0.7,
+        }
+    }
+
+    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
+        self.max_tokens = max_tokens;
+        self
+    }
+
+    pub fn with_temperature(mut self, temperature: f32) -> Self {
+        self.temperature = temperature;
+        self
+    }
+}
+
+
+
+#[async_trait]
+impl Tool for TaskTool {
+    fn name(&self) -> &str {
+        "task"
+    }
+
+    fn description(&self) -> &str {
+        "Delegate a sub-task to a sub-agent. The sub-agent will work independently \
+         with its own context and tools. Use this to break complex tasks into \
+         parallel or sequential sub-tasks. Each sub-task runs in its own session \
+         with a focused system prompt."
+    }
+
+    fn input_schema(&self) -> Value {
+        json!({
+            "type": "object",
+            "properties": {
+                "description": {
+                    "type": "string",
+                    "description": "Short description of the sub-task (shown in progress UI)"
+                },
+                "prompt": {
+                    "type": "string",
+                    "description": "Detailed instructions for the sub-agent"
+                },
+                "max_iterations": {
+                    "type": "integer",
+                    "description": "Maximum tool-call iterations for the sub-agent (default: 5)",
+                    "minimum": 1,
+                    "maximum": 10
+                }
+            },
+            "required": ["description", "prompt"]
+        })
+    }
+
+    async fn execute(&self, input: Value, context: &ToolContext) -> Result<Value> {
+        let description = input["description"].as_str()
+            .ok_or_else(|| ZclawError::InvalidInput("Missing 'description' parameter".into()))?;
+
+        let prompt = input["prompt"].as_str()
+            .ok_or_else(|| ZclawError::InvalidInput("Missing 'prompt' parameter".into()))?;
+
+        let max_iterations = input["max_iterations"].as_u64()
+            .unwrap_or(DEFAULT_MAX_ITERATIONS as u64) as usize;
+
+        tracing::info!(
+            "[TaskTool] Starting sub-agent task: {:?} (max_iterations={})",
+            description, max_iterations
+        );
+
+        // Create a sub-agent with its own ID
+        let sub_agent_id = AgentId::new();
+
+        // Create a fresh session for the sub-agent
+        let session_id = self.memory.create_session(&sub_agent_id).await?;
+
+        // Build system prompt focused on the sub-task
+        let system_prompt = format!(
+            "你是一个专注的子Agent，负责完成以下任务：{}\n\n\
+             要求：\n\
+             - 专注完成分配给你的任务\n\
+             - 使用可用的工具来完成任务\n\
+             - 完成后提供简洁的结果摘要\n\
+             - 如果遇到无法解决的问题，请说明原因",
+            description
+        );
+
+        // Create a tool registry with builtin tools
+        // (TaskTool itself is NOT included to prevent infinite nesting)
+        let mut tools = ToolRegistry::new();
+        register_builtin_tools(&mut tools);
+
+        // Build a lightweight AgentLoop for the sub-agent
+        let mut sub_loop = AgentLoop::new(
+            sub_agent_id,
+            self.driver.clone(),
+            tools,
+            self.memory.clone(),
+        )
+            .with_model(&self.model)
+            .with_system_prompt(&system_prompt)
+            .with_max_tokens(self.max_tokens)
+            .with_temperature(self.temperature);
+
+        // Optionally inject skill executor and path validator from parent context
+        if let Some(ref executor) = context.skill_executor {
+            sub_loop = sub_loop.with_skill_executor(executor.clone());
+        }
+        if let Some(ref validator) = context.path_validator {
+            sub_loop = sub_loop.with_path_validator(validator.clone());
+        }
+
+        // Execute the sub-agent loop (non-streaming — collect full result)
+        let result = match sub_loop.run(session_id.clone(), prompt.to_string()).await {
+            Ok(loop_result) => {
+                tracing::info!(
+                    "[TaskTool] Sub-agent completed: {} iterations, {} input tokens, {} output tokens",
+                    loop_result.iterations, loop_result.input_tokens, loop_result.output_tokens
+                );
+                json!({
+                    "status": "completed",
+                    "description": description,
+                    "result": loop_result.response,
+                    "iterations": loop_result.iterations,
+                    "input_tokens": loop_result.input_tokens,
+                    "output_tokens": loop_result.output_tokens,
+                })
+            }
+            Err(e) => {
+                tracing::warn!("[TaskTool] Sub-agent failed: {}", e);
+                json!({
+                    "status": "failed",
+                    "description": description,
+                    "error": e.to_string(),
+                })
+            }
+        };
+
+        Ok(result)
+    }
+}