refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions
--- a/crates/zclaw-runtime/src/loop_runner.rs
+++ b/crates/zclaw-runtime/src/loop_runner.rs
@@ -14,6 +14,7 @@ use crate::loop_guard::{LoopGuard, LoopGuardResult};
 use crate::growth::GrowthIntegration;
 use crate::compaction::{self, CompactionConfig};
 use crate::middleware::{self, MiddlewareChain};
+use crate::prompt::{PromptBuilder, PromptContext};
 use zclaw_memory::MemoryStore;

 /// Agent loop runner
@@ -25,6 +26,8 @@ pub struct AgentLoop {
    loop_guard: Mutex<LoopGuard>,
    model: String,
    system_prompt: Option<String>,
+    /// Custom agent personality for prompt assembly
+    soul: Option<String>,
    max_tokens: u32,
    temperature: f32,
    skill_executor: Option<Arc<dyn SkillExecutor>>,
@@ -39,6 +42,12 @@ pub struct AgentLoop {
    /// delegated to the chain instead of the inline code below.
    /// When `None`, the legacy inline path is used (100% backward compatible).
    middleware_chain: Option<MiddlewareChain>,
+    /// Chat mode: extended thinking enabled
+    thinking_enabled: bool,
+    /// Chat mode: reasoning effort level
+    reasoning_effort: Option<String>,
+    /// Chat mode: plan mode
+    plan_mode: bool,
 }

 impl AgentLoop {
@@ -56,7 +65,8 @@ impl AgentLoop {
            loop_guard: Mutex::new(LoopGuard::default()),
            model: String::new(), // Must be set via with_model()
            system_prompt: None,
-            max_tokens: 4096,
+            soul: None,
+            max_tokens: 16384,
            temperature: 0.7,
            skill_executor: None,
            path_validator: None,
@@ -64,6 +74,9 @@ impl AgentLoop {
            compaction_threshold: 0,
            compaction_config: CompactionConfig::default(),
            middleware_chain: None,
+            thinking_enabled: false,
+            reasoning_effort: None,
+            plan_mode: false,
        }
    }

@@ -91,6 +104,30 @@ impl AgentLoop {
        self
    }

+    /// Set the agent personality (SOUL.md equivalent)
+    pub fn with_soul(mut self, soul: impl Into<String>) -> Self {
+        self.soul = Some(soul.into());
+        self
+    }
+
+    /// Enable extended thinking/reasoning mode
+    pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
+        self.thinking_enabled = enabled;
+        self
+    }
+
+    /// Set reasoning effort level (low/medium/high)
+    pub fn with_reasoning_effort(mut self, effort: impl Into<String>) -> Self {
+        self.reasoning_effort = Some(effort.into());
+        self
+    }
+
+    /// Enable plan mode
+    pub fn with_plan_mode(mut self, enabled: bool) -> Self {
+        self.plan_mode = enabled;
+        self
+    }
+
    /// Set max tokens
    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
        self.max_tokens = max_tokens;
@@ -214,7 +251,15 @@ impl AgentLoop {

        // Enhance system prompt — skip when middleware chain handles it
        let mut enhanced_prompt = if use_middleware {
-            self.system_prompt.clone().unwrap_or_default()
+            let prompt_ctx = PromptContext {
+                base_prompt: self.system_prompt.clone(),
+                soul: self.soul.clone(),
+                thinking_enabled: self.thinking_enabled,
+                plan_mode: self.plan_mode,
+                tool_definitions: self.tools.definitions(),
+                agent_name: None,
+            };
+            PromptBuilder::new().build(&prompt_ctx)
        } else if let Some(ref growth) = self.growth {
            let base = self.system_prompt.as_deref().unwrap_or("");
            growth.enhance_prompt(&self.agent_id, base, &input).await?
@@ -279,6 +324,9 @@ impl AgentLoop {
                temperature: Some(self.temperature),
                stop: Vec::new(),
                stream: false,
+                thinking_enabled: self.thinking_enabled,
+                reasoning_effort: self.reasoning_effort.clone(),
+                plan_mode: self.plan_mode,
            };

            // Call LLM
@@ -352,7 +400,12 @@ impl AgentLoop {
            // Create tool context and execute all tools
            let tool_context = self.create_tool_context(session_id.clone());
            let mut circuit_breaker_triggered = false;
+            let mut abort_result: Option<AgentLoopResult> = None;
            for (id, name, input) in tool_calls {
+                // Check if loop was already aborted
+                if abort_result.is_some() {
+                    break;
+                }
                // Check tool call safety — via middleware chain or inline loop guard
                if let Some(ref chain) = self.middleware_chain {
                    let mw_ctx_ref = middleware::MiddlewareContext {
@@ -382,6 +435,17 @@ impl AgentLoop {
                            messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), tool_result, false));
                            continue;
                        }
+                        middleware::ToolCallDecision::AbortLoop(reason) => {
+                            tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
+                            let msg = format!("{}\n已自动终止", reason);
+                            self.memory.append_message(&session_id, &Message::assistant(&msg)).await?;
+                            abort_result = Some(AgentLoopResult {
+                                response: msg,
+                                input_tokens: total_input_tokens,
+                                output_tokens: total_output_tokens,
+                                iterations,
+                            });
+                        }
                    }
                } else {
                    // Legacy inline path
@@ -421,6 +485,11 @@ impl AgentLoop {

            // Continue the loop - LLM will process tool results and generate final response

+            // If middleware aborted the loop, return immediately
+            if let Some(result) = abort_result {
+                break result;
+            }
+
            // If circuit breaker was triggered, terminate immediately
            if circuit_breaker_triggered {
                let msg = "检测到工具调用循环，已自动终止";
@@ -502,7 +571,15 @@ impl AgentLoop {

        // Enhance system prompt — skip when middleware chain handles it
        let mut enhanced_prompt = if use_middleware {
-            self.system_prompt.clone().unwrap_or_default()
+            let prompt_ctx = PromptContext {
+                base_prompt: self.system_prompt.clone(),
+                soul: self.soul.clone(),
+                thinking_enabled: self.thinking_enabled,
+                plan_mode: self.plan_mode,
+                tool_definitions: self.tools.definitions(),
+                agent_name: None,
+            };
+            PromptBuilder::new().build(&prompt_ctx)
        } else if let Some(ref growth) = self.growth {
            let base = self.system_prompt.as_deref().unwrap_or("");
            growth.enhance_prompt(&self.agent_id, base, &input).await?
@@ -552,6 +629,9 @@ impl AgentLoop {
        let model = self.model.clone();
        let max_tokens = self.max_tokens;
        let temperature = self.temperature;
+        let thinking_enabled = self.thinking_enabled;
+        let reasoning_effort = self.reasoning_effort.clone();
+        let plan_mode = self.plan_mode;

        tokio::spawn(async move {
            let mut messages = messages;
@@ -584,6 +664,9 @@ impl AgentLoop {
                    temperature: Some(temperature),
                    stop: Vec::new(),
                    stream: true,
+                    thinking_enabled,
+                    reasoning_effort: reasoning_effort.clone(),
+                    plan_mode,
                };

                let mut stream = driver.stream(request);
@@ -596,9 +679,12 @@ impl AgentLoop {
                let mut chunk_count: usize = 0;
                let mut text_delta_count: usize = 0;
                let mut thinking_delta_count: usize = 0;
-                while let Some(chunk_result) = stream.next().await {
-                    match chunk_result {
-                        Ok(chunk) => {
+                let mut stream_errored = false;
+                let chunk_timeout = std::time::Duration::from_secs(60);
+
+                loop {
+                    match tokio::time::timeout(chunk_timeout, stream.next()).await {
+                        Ok(Some(Ok(chunk))) => {
                            chunk_count += 1;
                            match &chunk {
                                StreamChunk::TextDelta { delta } => {
@@ -610,8 +696,8 @@ impl AgentLoop {
                                StreamChunk::ThinkingDelta { delta } => {
                                    thinking_delta_count += 1;
                                    tracing::debug!("[AgentLoop] ThinkingDelta #{}: {} chars", thinking_delta_count, delta.len());
-                                    // Accumulate reasoning separately — not mixed into iteration_text
                                    reasoning_text.push_str(delta);
+                                    let _ = tx.send(LoopEvent::ThinkingDelta(delta.clone())).await;
                                }
                                StreamChunk::ToolUseStart { id, name } => {
                                    tracing::debug!("[AgentLoop] ToolUseStart: id={}, name={}", id, name);
@@ -651,21 +737,43 @@ impl AgentLoop {
                                StreamChunk::Error { message } => {
                                    tracing::error!("[AgentLoop] Stream error: {}", message);
                                    let _ = tx.send(LoopEvent::Error(message.clone())).await;
+                                    stream_errored = true;
                                }
                            }
                        }
-                        Err(e) => {
+                        Ok(Some(Err(e))) => {
                            tracing::error!("[AgentLoop] Chunk error: {}", e);
-                            let _ = tx.send(LoopEvent::Error(e.to_string())).await;
+                            let _ = tx.send(LoopEvent::Error(format!("LLM 锥应错误: {}", e.to_string()))).await;
+                            stream_errored = true;
                        }
+                        Ok(None) => break, // Stream ended normally
+                        Err(_) => {
+                            tracing::error!("[AgentLoop] Stream chunk timeout ({}s)", chunk_timeout.as_secs());
+                            let _ = tx.send(LoopEvent::Error("LLM 响应超时，请重试".to_string())).await;
+                            stream_errored = true;
+                        }
+                    }
+                    if stream_errored {
+                        break;
                    }
                }
                tracing::info!("[AgentLoop] Stream ended: {} total chunks (text={}, thinking={}, tools={}), iteration_text={} chars",
                    chunk_count, text_delta_count, thinking_delta_count, pending_tool_calls.len(),
                    iteration_text.len());
-                if iteration_text.is_empty() {
-                    tracing::warn!("[AgentLoop] WARNING: iteration_text is EMPTY after {} chunks! text_delta={}, thinking_delta={}",
-                        chunk_count, text_delta_count, thinking_delta_count);
+
+                // Fallback: if model generated reasoning but no text content,
+                // use reasoning as text response. This happens with some thinking models
+                // (DeepSeek R1, QWQ) that put the answer in reasoning_content instead of content.
+                // Safe now because: (1) context is clean (no stale user_profile/memory injection),
+                // (2) max_tokens=16384 prevents truncation, (3) reasoning is about the correct topic.
+                if iteration_text.is_empty() && !reasoning_text.is_empty() {
+                    tracing::info!("[AgentLoop] Model generated {} chars of reasoning but no text — using reasoning as response",
+                        reasoning_text.len());
+                    let _ = tx.send(LoopEvent::Delta(reasoning_text.clone())).await;
+                    iteration_text = reasoning_text.clone();
+                } else if iteration_text.is_empty() {
+                    tracing::warn!("[AgentLoop] No text content after {} chunks (thinking_delta={})",
+                        chunk_count, thinking_delta_count);
                }

                // If no tool calls, we have the final response
@@ -706,6 +814,12 @@ impl AgentLoop {
                    break 'outer;
                }

+                // Skip tool processing if stream errored or timed out
+                if stream_errored {
+                    tracing::debug!("[AgentLoop] Stream errored, skipping tool processing and breaking");
+                    break 'outer;
+                }
+
                tracing::debug!("[AgentLoop] Processing {} tool calls (reasoning: {} chars)", pending_tool_calls.len(), reasoning_text.len());

                // Push assistant message with reasoning before tool calls (required by Kimi and other thinking-enabled APIs)
@@ -745,6 +859,11 @@ impl AgentLoop {
                                messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
                                continue;
                            }
+                            Ok(middleware::ToolCallDecision::AbortLoop(reason)) => {
+                                tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
+                                let _ = tx.send(LoopEvent::Error(reason)).await;
+                                break 'outer;
+                            }
                            Ok(middleware::ToolCallDecision::ReplaceInput(new_input)) => {
                                // Execute with replaced input (same path_validator logic below)
                                let pv = path_validator.clone().unwrap_or_else(|| {
@@ -883,6 +1002,8 @@ pub struct AgentLoopResult {
 pub enum LoopEvent {
    /// Text delta from LLM
    Delta(String),
+    /// Thinking/reasoning delta from LLM (extended thinking)
+    ThinkingDelta(String),
    /// Tool execution started
    ToolStart { name: String, input: serde_json::Value },
    /// Tool execution completed