refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup
- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
This commit is contained in:
@@ -14,6 +14,7 @@ use crate::loop_guard::{LoopGuard, LoopGuardResult};
|
||||
use crate::growth::GrowthIntegration;
|
||||
use crate::compaction::{self, CompactionConfig};
|
||||
use crate::middleware::{self, MiddlewareChain};
|
||||
use crate::prompt::{PromptBuilder, PromptContext};
|
||||
use zclaw_memory::MemoryStore;
|
||||
|
||||
/// Agent loop runner
|
||||
@@ -25,6 +26,8 @@ pub struct AgentLoop {
|
||||
loop_guard: Mutex<LoopGuard>,
|
||||
model: String,
|
||||
system_prompt: Option<String>,
|
||||
/// Custom agent personality for prompt assembly
|
||||
soul: Option<String>,
|
||||
max_tokens: u32,
|
||||
temperature: f32,
|
||||
skill_executor: Option<Arc<dyn SkillExecutor>>,
|
||||
@@ -39,6 +42,12 @@ pub struct AgentLoop {
|
||||
/// delegated to the chain instead of the inline code below.
|
||||
/// When `None`, the legacy inline path is used (100% backward compatible).
|
||||
middleware_chain: Option<MiddlewareChain>,
|
||||
/// Chat mode: extended thinking enabled
|
||||
thinking_enabled: bool,
|
||||
/// Chat mode: reasoning effort level
|
||||
reasoning_effort: Option<String>,
|
||||
/// Chat mode: plan mode
|
||||
plan_mode: bool,
|
||||
}
|
||||
|
||||
impl AgentLoop {
|
||||
@@ -56,7 +65,8 @@ impl AgentLoop {
|
||||
loop_guard: Mutex::new(LoopGuard::default()),
|
||||
model: String::new(), // Must be set via with_model()
|
||||
system_prompt: None,
|
||||
max_tokens: 4096,
|
||||
soul: None,
|
||||
max_tokens: 16384,
|
||||
temperature: 0.7,
|
||||
skill_executor: None,
|
||||
path_validator: None,
|
||||
@@ -64,6 +74,9 @@ impl AgentLoop {
|
||||
compaction_threshold: 0,
|
||||
compaction_config: CompactionConfig::default(),
|
||||
middleware_chain: None,
|
||||
thinking_enabled: false,
|
||||
reasoning_effort: None,
|
||||
plan_mode: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,6 +104,30 @@ impl AgentLoop {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the agent personality (SOUL.md equivalent)
|
||||
pub fn with_soul(mut self, soul: impl Into<String>) -> Self {
|
||||
self.soul = Some(soul.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable extended thinking/reasoning mode
|
||||
pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
|
||||
self.thinking_enabled = enabled;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set reasoning effort level (low/medium/high)
|
||||
pub fn with_reasoning_effort(mut self, effort: impl Into<String>) -> Self {
|
||||
self.reasoning_effort = Some(effort.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable plan mode
|
||||
pub fn with_plan_mode(mut self, enabled: bool) -> Self {
|
||||
self.plan_mode = enabled;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set max tokens
|
||||
pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
|
||||
self.max_tokens = max_tokens;
|
||||
@@ -214,7 +251,15 @@ impl AgentLoop {
|
||||
|
||||
// Enhance system prompt — skip when middleware chain handles it
|
||||
let mut enhanced_prompt = if use_middleware {
|
||||
self.system_prompt.clone().unwrap_or_default()
|
||||
let prompt_ctx = PromptContext {
|
||||
base_prompt: self.system_prompt.clone(),
|
||||
soul: self.soul.clone(),
|
||||
thinking_enabled: self.thinking_enabled,
|
||||
plan_mode: self.plan_mode,
|
||||
tool_definitions: self.tools.definitions(),
|
||||
agent_name: None,
|
||||
};
|
||||
PromptBuilder::new().build(&prompt_ctx)
|
||||
} else if let Some(ref growth) = self.growth {
|
||||
let base = self.system_prompt.as_deref().unwrap_or("");
|
||||
growth.enhance_prompt(&self.agent_id, base, &input).await?
|
||||
@@ -279,6 +324,9 @@ impl AgentLoop {
|
||||
temperature: Some(self.temperature),
|
||||
stop: Vec::new(),
|
||||
stream: false,
|
||||
thinking_enabled: self.thinking_enabled,
|
||||
reasoning_effort: self.reasoning_effort.clone(),
|
||||
plan_mode: self.plan_mode,
|
||||
};
|
||||
|
||||
// Call LLM
|
||||
@@ -352,7 +400,12 @@ impl AgentLoop {
|
||||
// Create tool context and execute all tools
|
||||
let tool_context = self.create_tool_context(session_id.clone());
|
||||
let mut circuit_breaker_triggered = false;
|
||||
let mut abort_result: Option<AgentLoopResult> = None;
|
||||
for (id, name, input) in tool_calls {
|
||||
// Check if loop was already aborted
|
||||
if abort_result.is_some() {
|
||||
break;
|
||||
}
|
||||
// Check tool call safety — via middleware chain or inline loop guard
|
||||
if let Some(ref chain) = self.middleware_chain {
|
||||
let mw_ctx_ref = middleware::MiddlewareContext {
|
||||
@@ -382,6 +435,17 @@ impl AgentLoop {
|
||||
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), tool_result, false));
|
||||
continue;
|
||||
}
|
||||
middleware::ToolCallDecision::AbortLoop(reason) => {
|
||||
tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
|
||||
let msg = format!("{}\n已自动终止", reason);
|
||||
self.memory.append_message(&session_id, &Message::assistant(&msg)).await?;
|
||||
abort_result = Some(AgentLoopResult {
|
||||
response: msg,
|
||||
input_tokens: total_input_tokens,
|
||||
output_tokens: total_output_tokens,
|
||||
iterations,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Legacy inline path
|
||||
@@ -421,6 +485,11 @@ impl AgentLoop {
|
||||
|
||||
// Continue the loop - LLM will process tool results and generate final response
|
||||
|
||||
// If middleware aborted the loop, return immediately
|
||||
if let Some(result) = abort_result {
|
||||
break result;
|
||||
}
|
||||
|
||||
// If circuit breaker was triggered, terminate immediately
|
||||
if circuit_breaker_triggered {
|
||||
let msg = "检测到工具调用循环,已自动终止";
|
||||
@@ -502,7 +571,15 @@ impl AgentLoop {
|
||||
|
||||
// Enhance system prompt — skip when middleware chain handles it
|
||||
let mut enhanced_prompt = if use_middleware {
|
||||
self.system_prompt.clone().unwrap_or_default()
|
||||
let prompt_ctx = PromptContext {
|
||||
base_prompt: self.system_prompt.clone(),
|
||||
soul: self.soul.clone(),
|
||||
thinking_enabled: self.thinking_enabled,
|
||||
plan_mode: self.plan_mode,
|
||||
tool_definitions: self.tools.definitions(),
|
||||
agent_name: None,
|
||||
};
|
||||
PromptBuilder::new().build(&prompt_ctx)
|
||||
} else if let Some(ref growth) = self.growth {
|
||||
let base = self.system_prompt.as_deref().unwrap_or("");
|
||||
growth.enhance_prompt(&self.agent_id, base, &input).await?
|
||||
@@ -552,6 +629,9 @@ impl AgentLoop {
|
||||
let model = self.model.clone();
|
||||
let max_tokens = self.max_tokens;
|
||||
let temperature = self.temperature;
|
||||
let thinking_enabled = self.thinking_enabled;
|
||||
let reasoning_effort = self.reasoning_effort.clone();
|
||||
let plan_mode = self.plan_mode;
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut messages = messages;
|
||||
@@ -584,6 +664,9 @@ impl AgentLoop {
|
||||
temperature: Some(temperature),
|
||||
stop: Vec::new(),
|
||||
stream: true,
|
||||
thinking_enabled,
|
||||
reasoning_effort: reasoning_effort.clone(),
|
||||
plan_mode,
|
||||
};
|
||||
|
||||
let mut stream = driver.stream(request);
|
||||
@@ -596,9 +679,12 @@ impl AgentLoop {
|
||||
let mut chunk_count: usize = 0;
|
||||
let mut text_delta_count: usize = 0;
|
||||
let mut thinking_delta_count: usize = 0;
|
||||
while let Some(chunk_result) = stream.next().await {
|
||||
match chunk_result {
|
||||
Ok(chunk) => {
|
||||
let mut stream_errored = false;
|
||||
let chunk_timeout = std::time::Duration::from_secs(60);
|
||||
|
||||
loop {
|
||||
match tokio::time::timeout(chunk_timeout, stream.next()).await {
|
||||
Ok(Some(Ok(chunk))) => {
|
||||
chunk_count += 1;
|
||||
match &chunk {
|
||||
StreamChunk::TextDelta { delta } => {
|
||||
@@ -610,8 +696,8 @@ impl AgentLoop {
|
||||
StreamChunk::ThinkingDelta { delta } => {
|
||||
thinking_delta_count += 1;
|
||||
tracing::debug!("[AgentLoop] ThinkingDelta #{}: {} chars", thinking_delta_count, delta.len());
|
||||
// Accumulate reasoning separately — not mixed into iteration_text
|
||||
reasoning_text.push_str(delta);
|
||||
let _ = tx.send(LoopEvent::ThinkingDelta(delta.clone())).await;
|
||||
}
|
||||
StreamChunk::ToolUseStart { id, name } => {
|
||||
tracing::debug!("[AgentLoop] ToolUseStart: id={}, name={}", id, name);
|
||||
@@ -651,21 +737,43 @@ impl AgentLoop {
|
||||
StreamChunk::Error { message } => {
|
||||
tracing::error!("[AgentLoop] Stream error: {}", message);
|
||||
let _ = tx.send(LoopEvent::Error(message.clone())).await;
|
||||
stream_errored = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
Ok(Some(Err(e))) => {
|
||||
tracing::error!("[AgentLoop] Chunk error: {}", e);
|
||||
let _ = tx.send(LoopEvent::Error(e.to_string())).await;
|
||||
let _ = tx.send(LoopEvent::Error(format!("LLM 锥应错误: {}", e.to_string()))).await;
|
||||
stream_errored = true;
|
||||
}
|
||||
Ok(None) => break, // Stream ended normally
|
||||
Err(_) => {
|
||||
tracing::error!("[AgentLoop] Stream chunk timeout ({}s)", chunk_timeout.as_secs());
|
||||
let _ = tx.send(LoopEvent::Error("LLM 响应超时,请重试".to_string())).await;
|
||||
stream_errored = true;
|
||||
}
|
||||
}
|
||||
if stream_errored {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tracing::info!("[AgentLoop] Stream ended: {} total chunks (text={}, thinking={}, tools={}), iteration_text={} chars",
|
||||
chunk_count, text_delta_count, thinking_delta_count, pending_tool_calls.len(),
|
||||
iteration_text.len());
|
||||
if iteration_text.is_empty() {
|
||||
tracing::warn!("[AgentLoop] WARNING: iteration_text is EMPTY after {} chunks! text_delta={}, thinking_delta={}",
|
||||
chunk_count, text_delta_count, thinking_delta_count);
|
||||
|
||||
// Fallback: if model generated reasoning but no text content,
|
||||
// use reasoning as text response. This happens with some thinking models
|
||||
// (DeepSeek R1, QWQ) that put the answer in reasoning_content instead of content.
|
||||
// Safe now because: (1) context is clean (no stale user_profile/memory injection),
|
||||
// (2) max_tokens=16384 prevents truncation, (3) reasoning is about the correct topic.
|
||||
if iteration_text.is_empty() && !reasoning_text.is_empty() {
|
||||
tracing::info!("[AgentLoop] Model generated {} chars of reasoning but no text — using reasoning as response",
|
||||
reasoning_text.len());
|
||||
let _ = tx.send(LoopEvent::Delta(reasoning_text.clone())).await;
|
||||
iteration_text = reasoning_text.clone();
|
||||
} else if iteration_text.is_empty() {
|
||||
tracing::warn!("[AgentLoop] No text content after {} chunks (thinking_delta={})",
|
||||
chunk_count, thinking_delta_count);
|
||||
}
|
||||
|
||||
// If no tool calls, we have the final response
|
||||
@@ -706,6 +814,12 @@ impl AgentLoop {
|
||||
break 'outer;
|
||||
}
|
||||
|
||||
// Skip tool processing if stream errored or timed out
|
||||
if stream_errored {
|
||||
tracing::debug!("[AgentLoop] Stream errored, skipping tool processing and breaking");
|
||||
break 'outer;
|
||||
}
|
||||
|
||||
tracing::debug!("[AgentLoop] Processing {} tool calls (reasoning: {} chars)", pending_tool_calls.len(), reasoning_text.len());
|
||||
|
||||
// Push assistant message with reasoning before tool calls (required by Kimi and other thinking-enabled APIs)
|
||||
@@ -745,6 +859,11 @@ impl AgentLoop {
|
||||
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
|
||||
continue;
|
||||
}
|
||||
Ok(middleware::ToolCallDecision::AbortLoop(reason)) => {
|
||||
tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
|
||||
let _ = tx.send(LoopEvent::Error(reason)).await;
|
||||
break 'outer;
|
||||
}
|
||||
Ok(middleware::ToolCallDecision::ReplaceInput(new_input)) => {
|
||||
// Execute with replaced input (same path_validator logic below)
|
||||
let pv = path_validator.clone().unwrap_or_else(|| {
|
||||
@@ -883,6 +1002,8 @@ pub struct AgentLoopResult {
|
||||
pub enum LoopEvent {
|
||||
/// Text delta from LLM
|
||||
Delta(String),
|
||||
/// Thinking/reasoning delta from LLM (extended thinking)
|
||||
ThinkingDelta(String),
|
||||
/// Tool execution started
|
||||
ToolStart { name: String, input: serde_json::Value },
|
||||
/// Tool execution completed
|
||||
|
||||
Reference in New Issue
Block a user