refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules
- Split zclaw-kernel/generation.rs (1080 lines) into 3 modules
- Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard
- Add PromptBuilder for structured system prompt assembly
- Add FactStore (zclaw-memory) for persistent fact extraction
- Add task builtin tool for agent task management
- Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings
- Replace let _ = with proper log::warn! across SaaS handlers
- Remove unused dependency (url) from zclaw-hands
This commit is contained in:
iven
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions

View File

@@ -14,6 +14,7 @@ use crate::loop_guard::{LoopGuard, LoopGuardResult};
use crate::growth::GrowthIntegration;
use crate::compaction::{self, CompactionConfig};
use crate::middleware::{self, MiddlewareChain};
use crate::prompt::{PromptBuilder, PromptContext};
use zclaw_memory::MemoryStore;
/// Agent loop runner
@@ -25,6 +26,8 @@ pub struct AgentLoop {
loop_guard: Mutex<LoopGuard>,
model: String,
system_prompt: Option<String>,
/// Custom agent personality for prompt assembly
soul: Option<String>,
max_tokens: u32,
temperature: f32,
skill_executor: Option<Arc<dyn SkillExecutor>>,
@@ -39,6 +42,12 @@ pub struct AgentLoop {
/// delegated to the chain instead of the inline code below.
/// When `None`, the legacy inline path is used (100% backward compatible).
middleware_chain: Option<MiddlewareChain>,
/// Chat mode: extended thinking enabled
thinking_enabled: bool,
/// Chat mode: reasoning effort level
reasoning_effort: Option<String>,
/// Chat mode: plan mode
plan_mode: bool,
}
impl AgentLoop {
@@ -56,7 +65,8 @@ impl AgentLoop {
loop_guard: Mutex::new(LoopGuard::default()),
model: String::new(), // Must be set via with_model()
system_prompt: None,
max_tokens: 4096,
soul: None,
max_tokens: 16384,
temperature: 0.7,
skill_executor: None,
path_validator: None,
@@ -64,6 +74,9 @@ impl AgentLoop {
compaction_threshold: 0,
compaction_config: CompactionConfig::default(),
middleware_chain: None,
thinking_enabled: false,
reasoning_effort: None,
plan_mode: false,
}
}
@@ -91,6 +104,30 @@ impl AgentLoop {
self
}
/// Set the agent personality (SOUL.md equivalent)
pub fn with_soul(mut self, soul: impl Into<String>) -> Self {
self.soul = Some(soul.into());
self
}
/// Enable extended thinking/reasoning mode
pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
self.thinking_enabled = enabled;
self
}
/// Set reasoning effort level (low/medium/high)
pub fn with_reasoning_effort(mut self, effort: impl Into<String>) -> Self {
self.reasoning_effort = Some(effort.into());
self
}
/// Enable plan mode
pub fn with_plan_mode(mut self, enabled: bool) -> Self {
self.plan_mode = enabled;
self
}
/// Set max tokens
pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
self.max_tokens = max_tokens;
@@ -214,7 +251,15 @@ impl AgentLoop {
// Enhance system prompt — skip when middleware chain handles it
let mut enhanced_prompt = if use_middleware {
self.system_prompt.clone().unwrap_or_default()
let prompt_ctx = PromptContext {
base_prompt: self.system_prompt.clone(),
soul: self.soul.clone(),
thinking_enabled: self.thinking_enabled,
plan_mode: self.plan_mode,
tool_definitions: self.tools.definitions(),
agent_name: None,
};
PromptBuilder::new().build(&prompt_ctx)
} else if let Some(ref growth) = self.growth {
let base = self.system_prompt.as_deref().unwrap_or("");
growth.enhance_prompt(&self.agent_id, base, &input).await?
@@ -279,6 +324,9 @@ impl AgentLoop {
temperature: Some(self.temperature),
stop: Vec::new(),
stream: false,
thinking_enabled: self.thinking_enabled,
reasoning_effort: self.reasoning_effort.clone(),
plan_mode: self.plan_mode,
};
// Call LLM
@@ -352,7 +400,12 @@ impl AgentLoop {
// Create tool context and execute all tools
let tool_context = self.create_tool_context(session_id.clone());
let mut circuit_breaker_triggered = false;
let mut abort_result: Option<AgentLoopResult> = None;
for (id, name, input) in tool_calls {
// Check if loop was already aborted
if abort_result.is_some() {
break;
}
// Check tool call safety — via middleware chain or inline loop guard
if let Some(ref chain) = self.middleware_chain {
let mw_ctx_ref = middleware::MiddlewareContext {
@@ -382,6 +435,17 @@ impl AgentLoop {
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), tool_result, false));
continue;
}
middleware::ToolCallDecision::AbortLoop(reason) => {
tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
let msg = format!("{}\n已自动终止", reason);
self.memory.append_message(&session_id, &Message::assistant(&msg)).await?;
abort_result = Some(AgentLoopResult {
response: msg,
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations,
});
}
}
} else {
// Legacy inline path
@@ -421,6 +485,11 @@ impl AgentLoop {
// Continue the loop - LLM will process tool results and generate final response
// If middleware aborted the loop, return immediately
if let Some(result) = abort_result {
break result;
}
// If circuit breaker was triggered, terminate immediately
if circuit_breaker_triggered {
let msg = "检测到工具调用循环,已自动终止";
@@ -502,7 +571,15 @@ impl AgentLoop {
// Enhance system prompt — skip when middleware chain handles it
let mut enhanced_prompt = if use_middleware {
self.system_prompt.clone().unwrap_or_default()
let prompt_ctx = PromptContext {
base_prompt: self.system_prompt.clone(),
soul: self.soul.clone(),
thinking_enabled: self.thinking_enabled,
plan_mode: self.plan_mode,
tool_definitions: self.tools.definitions(),
agent_name: None,
};
PromptBuilder::new().build(&prompt_ctx)
} else if let Some(ref growth) = self.growth {
let base = self.system_prompt.as_deref().unwrap_or("");
growth.enhance_prompt(&self.agent_id, base, &input).await?
@@ -552,6 +629,9 @@ impl AgentLoop {
let model = self.model.clone();
let max_tokens = self.max_tokens;
let temperature = self.temperature;
let thinking_enabled = self.thinking_enabled;
let reasoning_effort = self.reasoning_effort.clone();
let plan_mode = self.plan_mode;
tokio::spawn(async move {
let mut messages = messages;
@@ -584,6 +664,9 @@ impl AgentLoop {
temperature: Some(temperature),
stop: Vec::new(),
stream: true,
thinking_enabled,
reasoning_effort: reasoning_effort.clone(),
plan_mode,
};
let mut stream = driver.stream(request);
@@ -596,9 +679,12 @@ impl AgentLoop {
let mut chunk_count: usize = 0;
let mut text_delta_count: usize = 0;
let mut thinking_delta_count: usize = 0;
while let Some(chunk_result) = stream.next().await {
match chunk_result {
Ok(chunk) => {
let mut stream_errored = false;
let chunk_timeout = std::time::Duration::from_secs(60);
loop {
match tokio::time::timeout(chunk_timeout, stream.next()).await {
Ok(Some(Ok(chunk))) => {
chunk_count += 1;
match &chunk {
StreamChunk::TextDelta { delta } => {
@@ -610,8 +696,8 @@ impl AgentLoop {
StreamChunk::ThinkingDelta { delta } => {
thinking_delta_count += 1;
tracing::debug!("[AgentLoop] ThinkingDelta #{}: {} chars", thinking_delta_count, delta.len());
// Accumulate reasoning separately — not mixed into iteration_text
reasoning_text.push_str(delta);
let _ = tx.send(LoopEvent::ThinkingDelta(delta.clone())).await;
}
StreamChunk::ToolUseStart { id, name } => {
tracing::debug!("[AgentLoop] ToolUseStart: id={}, name={}", id, name);
@@ -651,21 +737,43 @@ impl AgentLoop {
StreamChunk::Error { message } => {
tracing::error!("[AgentLoop] Stream error: {}", message);
let _ = tx.send(LoopEvent::Error(message.clone())).await;
stream_errored = true;
}
}
}
Err(e) => {
Ok(Some(Err(e))) => {
tracing::error!("[AgentLoop] Chunk error: {}", e);
let _ = tx.send(LoopEvent::Error(e.to_string())).await;
let _ = tx.send(LoopEvent::Error(format!("LLM 锥应错误: {}", e.to_string()))).await;
stream_errored = true;
}
Ok(None) => break, // Stream ended normally
Err(_) => {
tracing::error!("[AgentLoop] Stream chunk timeout ({}s)", chunk_timeout.as_secs());
let _ = tx.send(LoopEvent::Error("LLM 响应超时,请重试".to_string())).await;
stream_errored = true;
}
}
if stream_errored {
break;
}
}
tracing::info!("[AgentLoop] Stream ended: {} total chunks (text={}, thinking={}, tools={}), iteration_text={} chars",
chunk_count, text_delta_count, thinking_delta_count, pending_tool_calls.len(),
iteration_text.len());
if iteration_text.is_empty() {
tracing::warn!("[AgentLoop] WARNING: iteration_text is EMPTY after {} chunks! text_delta={}, thinking_delta={}",
chunk_count, text_delta_count, thinking_delta_count);
// Fallback: if model generated reasoning but no text content,
// use reasoning as text response. This happens with some thinking models
// (DeepSeek R1, QWQ) that put the answer in reasoning_content instead of content.
// Safe now because: (1) context is clean (no stale user_profile/memory injection),
// (2) max_tokens=16384 prevents truncation, (3) reasoning is about the correct topic.
if iteration_text.is_empty() && !reasoning_text.is_empty() {
tracing::info!("[AgentLoop] Model generated {} chars of reasoning but no text — using reasoning as response",
reasoning_text.len());
let _ = tx.send(LoopEvent::Delta(reasoning_text.clone())).await;
iteration_text = reasoning_text.clone();
} else if iteration_text.is_empty() {
tracing::warn!("[AgentLoop] No text content after {} chunks (thinking_delta={})",
chunk_count, thinking_delta_count);
}
// If no tool calls, we have the final response
@@ -706,6 +814,12 @@ impl AgentLoop {
break 'outer;
}
// Skip tool processing if stream errored or timed out
if stream_errored {
tracing::debug!("[AgentLoop] Stream errored, skipping tool processing and breaking");
break 'outer;
}
tracing::debug!("[AgentLoop] Processing {} tool calls (reasoning: {} chars)", pending_tool_calls.len(), reasoning_text.len());
// Push assistant message with reasoning before tool calls (required by Kimi and other thinking-enabled APIs)
@@ -745,6 +859,11 @@ impl AgentLoop {
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
continue;
}
Ok(middleware::ToolCallDecision::AbortLoop(reason)) => {
tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
let _ = tx.send(LoopEvent::Error(reason)).await;
break 'outer;
}
Ok(middleware::ToolCallDecision::ReplaceInput(new_input)) => {
// Execute with replaced input (same path_validator logic below)
let pv = path_validator.clone().unwrap_or_else(|| {
@@ -883,6 +1002,8 @@ pub struct AgentLoopResult {
pub enum LoopEvent {
/// Text delta from LLM
Delta(String),
/// Thinking/reasoning delta from LLM (extended thinking)
ThinkingDelta(String),
/// Tool execution started
ToolStart { name: String, input: serde_json::Value },
/// Tool execution completed