Files
zclaw_openfang/crates/zclaw-runtime/src/loop_runner.rs
iven 6a13fff9ec
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
fix(runtime): deep audit fixes — clarification loop termination + callback alignment
CRITICAL:
- ask_clarification now terminates Agent Loop in both run() and run_streaming()
  paths, preventing the LLM from continuing after requesting user clarification

HIGH:
- SaaS relay now forwards plan_mode and subagent_enabled to backend
- GatewayClient.chatStream now supports onThinkingDelta, onSubtaskStatus,
  and token-bearing onComplete — aligned with kernel-types StreamCallbacks
- ZclawStreamEvent type extended with thinking_delta, subtask_status variants
  and input_tokens/output_tokens fields for token tracking via Gateway path
2026-04-06 16:50:48 +08:00

1088 lines
52 KiB
Rust

//! Agent loop implementation
use std::sync::Arc;
use std::sync::Mutex;
use futures::StreamExt;
use tokio::sync::mpsc;
use zclaw_types::{AgentId, SessionId, Message, Result};
use crate::driver::{LlmDriver, CompletionRequest, ContentBlock};
use crate::stream::StreamChunk;
use crate::tool::{ToolRegistry, ToolContext, SkillExecutor};
use crate::tool::builtin::PathValidator;
use crate::loop_guard::{LoopGuard, LoopGuardResult};
use crate::growth::GrowthIntegration;
use crate::compaction::{self, CompactionConfig};
use crate::middleware::{self, MiddlewareChain};
use crate::prompt::{PromptBuilder, PromptContext};
use zclaw_memory::MemoryStore;
/// Agent loop runner
pub struct AgentLoop {
agent_id: AgentId,
driver: Arc<dyn LlmDriver>,
tools: ToolRegistry,
memory: Arc<MemoryStore>,
loop_guard: Mutex<LoopGuard>,
model: String,
system_prompt: Option<String>,
/// Custom agent personality for prompt assembly
soul: Option<String>,
max_tokens: u32,
temperature: f32,
skill_executor: Option<Arc<dyn SkillExecutor>>,
path_validator: Option<PathValidator>,
/// Growth system integration (optional)
growth: Option<GrowthIntegration>,
/// Compaction threshold in tokens (0 = disabled)
compaction_threshold: usize,
/// Compaction behavior configuration
compaction_config: CompactionConfig,
/// Optional middleware chain — when `Some`, cross-cutting logic is
/// delegated to the chain instead of the inline code below.
/// When `None`, the legacy inline path is used (100% backward compatible).
middleware_chain: Option<MiddlewareChain>,
/// Chat mode: extended thinking enabled
thinking_enabled: bool,
/// Chat mode: reasoning effort level
reasoning_effort: Option<String>,
/// Chat mode: plan mode
plan_mode: bool,
}
impl AgentLoop {
pub fn new(
agent_id: AgentId,
driver: Arc<dyn LlmDriver>,
tools: ToolRegistry,
memory: Arc<MemoryStore>,
) -> Self {
Self {
agent_id,
driver,
tools,
memory,
loop_guard: Mutex::new(LoopGuard::default()),
model: String::new(), // Must be set via with_model()
system_prompt: None,
soul: None,
max_tokens: 16384,
temperature: 0.7,
skill_executor: None,
path_validator: None,
growth: None,
compaction_threshold: 0,
compaction_config: CompactionConfig::default(),
middleware_chain: None,
thinking_enabled: false,
reasoning_effort: None,
plan_mode: false,
}
}
/// Set the skill executor for tool execution
pub fn with_skill_executor(mut self, executor: Arc<dyn SkillExecutor>) -> Self {
self.skill_executor = Some(executor);
self
}
/// Set the path validator for file system operations
pub fn with_path_validator(mut self, validator: PathValidator) -> Self {
self.path_validator = Some(validator);
self
}
/// Set the model to use
pub fn with_model(mut self, model: impl Into<String>) -> Self {
self.model = model.into();
self
}
/// Set the system prompt
pub fn with_system_prompt(mut self, prompt: impl Into<String>) -> Self {
self.system_prompt = Some(prompt.into());
self
}
/// Set the agent personality (SOUL.md equivalent)
pub fn with_soul(mut self, soul: impl Into<String>) -> Self {
self.soul = Some(soul.into());
self
}
/// Enable extended thinking/reasoning mode
pub fn with_thinking_enabled(mut self, enabled: bool) -> Self {
self.thinking_enabled = enabled;
self
}
/// Set reasoning effort level (low/medium/high)
pub fn with_reasoning_effort(mut self, effort: impl Into<String>) -> Self {
self.reasoning_effort = Some(effort.into());
self
}
/// Enable plan mode
pub fn with_plan_mode(mut self, enabled: bool) -> Self {
self.plan_mode = enabled;
self
}
/// Set max tokens
pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
self.max_tokens = max_tokens;
self
}
/// Set temperature
pub fn with_temperature(mut self, temperature: f32) -> Self {
self.temperature = temperature;
self
}
/// Enable growth system integration
pub fn with_growth(mut self, growth: GrowthIntegration) -> Self {
self.growth = Some(growth);
self
}
/// Set growth system (mutable)
pub fn set_growth(&mut self, growth: GrowthIntegration) {
self.growth = Some(growth);
}
/// Set compaction threshold in tokens (0 = disabled)
///
/// When the estimated token count of conversation history exceeds this
/// threshold, older messages are summarized into a single system message
/// and only recent messages are sent to the LLM.
pub fn with_compaction_threshold(mut self, threshold: usize) -> Self {
self.compaction_threshold = threshold;
self
}
/// Set compaction configuration (LLM mode, memory flushing, etc.)
pub fn with_compaction_config(mut self, config: CompactionConfig) -> Self {
self.compaction_config = config;
self
}
/// Inject a middleware chain. When set, cross-cutting concerns (compaction,
/// loop guard, token calibration, etc.) are delegated to the chain instead
/// of the inline logic.
pub fn with_middleware_chain(mut self, chain: MiddlewareChain) -> Self {
self.middleware_chain = Some(chain);
self
}
/// Get growth integration reference
pub fn growth(&self) -> Option<&GrowthIntegration> {
self.growth.as_ref()
}
/// Create tool context for tool execution
fn create_tool_context(&self, session_id: SessionId) -> ToolContext {
// If no path_validator is configured, create a default one with user home as workspace.
// This allows file_read/file_write tools to work without explicit workspace config,
// while still restricting access to the user's home directory for security.
let path_validator = self.path_validator.clone().unwrap_or_else(|| {
let home = std::env::var("USERPROFILE")
.or_else(|_| std::env::var("HOME"))
.unwrap_or_else(|_| ".".to_string());
let home_path = std::path::PathBuf::from(&home);
tracing::info!(
"[AgentLoop] No path_validator configured, using user home as workspace: {}",
home_path.display()
);
PathValidator::new().with_workspace(home_path)
});
let working_dir = path_validator.workspace_root()
.map(|p| p.to_string_lossy().to_string());
ToolContext {
agent_id: self.agent_id.clone(),
working_directory: working_dir,
session_id: Some(session_id.to_string()),
skill_executor: self.skill_executor.clone(),
path_validator: Some(path_validator),
event_sender: None,
}
}
/// Execute a tool with the given input
async fn execute_tool(&self, tool_name: &str, input: serde_json::Value, context: &ToolContext) -> Result<serde_json::Value> {
let tool = self.tools.get(tool_name)
.ok_or_else(|| zclaw_types::ZclawError::ToolError(format!("Unknown tool: {}", tool_name)))?;
tool.execute(input, context).await
}
/// Run the agent loop with a single message
/// Implements complete agent loop: LLM → Tool Call → Tool Result → LLM → Final Response
pub async fn run(&self, session_id: SessionId, input: String) -> Result<AgentLoopResult> {
// Add user message to session
let user_message = Message::user(input.clone());
self.memory.append_message(&session_id, &user_message).await?;
// Get all messages for context
let mut messages = self.memory.get_messages(&session_id).await?;
let use_middleware = self.middleware_chain.is_some();
// Apply compaction — skip inline path when middleware chain handles it
if !use_middleware && self.compaction_threshold > 0 {
let needs_async =
self.compaction_config.use_llm || self.compaction_config.memory_flush_enabled;
if needs_async {
let outcome = compaction::maybe_compact_with_config(
messages,
self.compaction_threshold,
&self.compaction_config,
&self.agent_id,
&session_id,
Some(&self.driver),
self.growth.as_ref(),
)
.await;
messages = outcome.messages;
} else {
messages = compaction::maybe_compact(messages, self.compaction_threshold);
}
}
// Enhance system prompt — skip when middleware chain handles it
let mut enhanced_prompt = if use_middleware {
let prompt_ctx = PromptContext {
base_prompt: self.system_prompt.clone(),
soul: self.soul.clone(),
thinking_enabled: self.thinking_enabled,
plan_mode: self.plan_mode,
tool_definitions: self.tools.definitions(),
agent_name: None,
};
PromptBuilder::new().build(&prompt_ctx)
} else if let Some(ref growth) = self.growth {
let base = self.system_prompt.as_deref().unwrap_or("");
growth.enhance_prompt(&self.agent_id, base, &input).await?
} else {
self.system_prompt.clone().unwrap_or_default()
};
// Run middleware before_completion hooks (compaction, memory inject, etc.)
if let Some(ref chain) = self.middleware_chain {
let mut mw_ctx = middleware::MiddlewareContext {
agent_id: self.agent_id.clone(),
session_id: session_id.clone(),
user_input: input.clone(),
system_prompt: enhanced_prompt.clone(),
messages,
response_content: Vec::new(),
input_tokens: 0,
output_tokens: 0,
};
match chain.run_before_completion(&mut mw_ctx).await? {
middleware::MiddlewareDecision::Continue => {
messages = mw_ctx.messages;
enhanced_prompt = mw_ctx.system_prompt;
}
middleware::MiddlewareDecision::Stop(reason) => {
return Ok(AgentLoopResult {
response: reason,
input_tokens: 0,
output_tokens: 0,
iterations: 1,
});
}
}
}
let max_iterations = 10;
let mut iterations = 0;
let mut total_input_tokens = 0u32;
let mut total_output_tokens = 0u32;
let result = loop {
iterations += 1;
if iterations > max_iterations {
// Save the state before returning
let error_msg = "达到最大迭代次数,请简化请求";
self.memory.append_message(&session_id, &Message::assistant(error_msg)).await?;
break AgentLoopResult {
response: error_msg.to_string(),
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations,
};
}
// Build completion request
let request = CompletionRequest {
model: self.model.clone(),
system: Some(enhanced_prompt.clone()),
messages: messages.clone(),
tools: self.tools.definitions(),
max_tokens: Some(self.max_tokens),
temperature: Some(self.temperature),
stop: Vec::new(),
stream: false,
thinking_enabled: self.thinking_enabled,
reasoning_effort: self.reasoning_effort.clone(),
plan_mode: self.plan_mode,
};
// Call LLM
let response = self.driver.complete(request).await?;
total_input_tokens += response.input_tokens;
total_output_tokens += response.output_tokens;
// Calibrate token estimation on first iteration
if iterations == 1 {
compaction::update_calibration(
compaction::estimate_messages_tokens(&messages),
response.input_tokens,
);
}
// Extract tool calls from response
let tool_calls: Vec<(String, String, serde_json::Value)> = response.content.iter()
.filter_map(|block| match block {
ContentBlock::ToolUse { id, name, input } => Some((id.clone(), name.clone(), input.clone())),
_ => None,
})
.collect();
// Extract text and thinking separately
let text_parts: Vec<String> = response.content.iter()
.filter_map(|block| match block {
ContentBlock::Text { text } => Some(text.clone()),
_ => None,
})
.collect();
let thinking_parts: Vec<String> = response.content.iter()
.filter_map(|block| match block {
ContentBlock::Thinking { thinking } => Some(thinking.clone()),
_ => None,
})
.collect();
let text_content = text_parts.join("\n");
let thinking_content = if thinking_parts.is_empty() { None } else { Some(thinking_parts.join("")) };
// If no tool calls, we have the final response
if tool_calls.is_empty() {
// Save final assistant message with thinking
let msg = if let Some(thinking) = &thinking_content {
Message::assistant_with_thinking(&text_content, thinking)
} else {
Message::assistant(&text_content)
};
self.memory.append_message(&session_id, &msg).await?;
break AgentLoopResult {
response: text_content,
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations,
};
}
// There are tool calls - push assistant message with thinking before tool calls
// (required by Kimi and other thinking-enabled APIs)
let assistant_msg = if let Some(thinking) = &thinking_content {
Message::assistant_with_thinking(&text_content, thinking)
} else {
Message::assistant(&text_content)
};
messages.push(assistant_msg);
for (id, name, input) in &tool_calls {
messages.push(Message::tool_use(id, zclaw_types::ToolId::new(name), input.clone()));
}
// Create tool context and execute all tools
let tool_context = self.create_tool_context(session_id.clone());
let mut circuit_breaker_triggered = false;
let mut abort_result: Option<AgentLoopResult> = None;
let mut clarification_result: Option<AgentLoopResult> = None;
for (id, name, input) in tool_calls {
// Check if loop was already aborted
if abort_result.is_some() {
break;
}
// Check tool call safety — via middleware chain or inline loop guard
if let Some(ref chain) = self.middleware_chain {
let mw_ctx_ref = middleware::MiddlewareContext {
agent_id: self.agent_id.clone(),
session_id: session_id.clone(),
user_input: input.to_string(),
system_prompt: enhanced_prompt.clone(),
messages: messages.clone(),
response_content: Vec::new(),
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
};
match chain.run_before_tool_call(&mw_ctx_ref, &name, &input).await? {
middleware::ToolCallDecision::Allow => {}
middleware::ToolCallDecision::Block(msg) => {
tracing::warn!("[AgentLoop] Tool '{}' blocked by middleware: {}", name, msg);
let error_output = serde_json::json!({ "error": msg });
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
continue;
}
middleware::ToolCallDecision::ReplaceInput(new_input) => {
// Execute with replaced input
let tool_result = match self.execute_tool(&name, new_input, &tool_context).await {
Ok(result) => result,
Err(e) => serde_json::json!({ "error": e.to_string() }),
};
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), tool_result, false));
continue;
}
middleware::ToolCallDecision::AbortLoop(reason) => {
tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
let msg = format!("{}\n已自动终止", reason);
self.memory.append_message(&session_id, &Message::assistant(&msg)).await?;
abort_result = Some(AgentLoopResult {
response: msg,
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations,
});
}
}
} else {
// Legacy inline path
let guard_result = self.loop_guard.lock().unwrap_or_else(|e| e.into_inner()).check(&name, &input);
match guard_result {
LoopGuardResult::CircuitBreaker => {
tracing::warn!("[AgentLoop] Circuit breaker triggered by tool '{}'", name);
circuit_breaker_triggered = true;
break;
}
LoopGuardResult::Blocked => {
tracing::warn!("[AgentLoop] Tool '{}' blocked by loop guard", name);
let error_output = serde_json::json!({ "error": "工具调用被循环防护拦截" });
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
continue;
}
LoopGuardResult::Warn => {
tracing::warn!("[AgentLoop] Tool '{}' triggered loop guard warning", name);
}
LoopGuardResult::Allowed => {}
}
}
let tool_result = match self.execute_tool(&name, input, &tool_context).await {
Ok(result) => result,
Err(e) => serde_json::json!({ "error": e.to_string() }),
};
// Check if this is a clarification response — terminate loop immediately
// so the LLM waits for user input instead of continuing to generate.
if name == "ask_clarification"
&& tool_result.get("status").and_then(|v| v.as_str()) == Some("clarification_needed")
{
tracing::info!("[AgentLoop] Clarification requested, terminating loop");
let question = tool_result.get("question")
.and_then(|v| v.as_str())
.unwrap_or("需要更多信息")
.to_string();
messages.push(Message::tool_result(
id,
zclaw_types::ToolId::new(&name),
tool_result,
false,
));
self.memory.append_message(&session_id, &Message::assistant(&question)).await?;
clarification_result = Some(AgentLoopResult {
response: question,
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations,
});
break;
}
// Add tool result to messages
messages.push(Message::tool_result(
id,
zclaw_types::ToolId::new(&name),
tool_result,
false, // is_error - we include errors in the result itself
));
}
// Continue the loop - LLM will process tool results and generate final response
// If middleware aborted the loop, return immediately
if let Some(result) = abort_result {
break result;
}
// If clarification was requested, return immediately
if let Some(result) = clarification_result {
break result;
}
// If circuit breaker was triggered, terminate immediately
if circuit_breaker_triggered {
let msg = "检测到工具调用循环,已自动终止";
self.memory.append_message(&session_id, &Message::assistant(msg)).await?;
break AgentLoopResult {
response: msg.to_string(),
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations,
};
}
};
// Post-completion processing — middleware chain or inline growth
if let Some(ref chain) = self.middleware_chain {
let mw_ctx = middleware::MiddlewareContext {
agent_id: self.agent_id.clone(),
session_id: session_id.clone(),
user_input: input.clone(),
system_prompt: enhanced_prompt.clone(),
messages: self.memory.get_messages(&session_id).await.unwrap_or_default(),
response_content: Vec::new(),
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
};
if let Err(e) = chain.run_after_completion(&mw_ctx).await {
tracing::warn!("[AgentLoop] Middleware after_completion failed: {}", e);
}
} else if let Some(ref growth) = self.growth {
// Legacy inline path
if let Ok(all_messages) = self.memory.get_messages(&session_id).await {
if let Err(e) = growth.process_conversation(&self.agent_id, &all_messages, session_id.clone()).await {
tracing::warn!("[AgentLoop] Growth processing failed: {}", e);
}
}
}
Ok(result)
}
/// Run the agent loop with streaming
/// Implements complete agent loop with multi-turn tool calling support
pub async fn run_streaming(
&self,
session_id: SessionId,
input: String,
) -> Result<mpsc::Receiver<LoopEvent>> {
let (tx, rx) = mpsc::channel(100);
// Add user message to session
let user_message = Message::user(input.clone());
self.memory.append_message(&session_id, &user_message).await?;
// Get all messages for context
let mut messages = self.memory.get_messages(&session_id).await?;
let use_middleware = self.middleware_chain.is_some();
// Apply compaction — skip inline path when middleware chain handles it
if !use_middleware && self.compaction_threshold > 0 {
let needs_async =
self.compaction_config.use_llm || self.compaction_config.memory_flush_enabled;
if needs_async {
let outcome = compaction::maybe_compact_with_config(
messages,
self.compaction_threshold,
&self.compaction_config,
&self.agent_id,
&session_id,
Some(&self.driver),
self.growth.as_ref(),
)
.await;
messages = outcome.messages;
} else {
messages = compaction::maybe_compact(messages, self.compaction_threshold);
}
}
// Enhance system prompt — skip when middleware chain handles it
let mut enhanced_prompt = if use_middleware {
let prompt_ctx = PromptContext {
base_prompt: self.system_prompt.clone(),
soul: self.soul.clone(),
thinking_enabled: self.thinking_enabled,
plan_mode: self.plan_mode,
tool_definitions: self.tools.definitions(),
agent_name: None,
};
PromptBuilder::new().build(&prompt_ctx)
} else if let Some(ref growth) = self.growth {
let base = self.system_prompt.as_deref().unwrap_or("");
growth.enhance_prompt(&self.agent_id, base, &input).await?
} else {
self.system_prompt.clone().unwrap_or_default()
};
// Run middleware before_completion hooks (compaction, memory inject, etc.)
if let Some(ref chain) = self.middleware_chain {
let mut mw_ctx = middleware::MiddlewareContext {
agent_id: self.agent_id.clone(),
session_id: session_id.clone(),
user_input: input.clone(),
system_prompt: enhanced_prompt.clone(),
messages,
response_content: Vec::new(),
input_tokens: 0,
output_tokens: 0,
};
match chain.run_before_completion(&mut mw_ctx).await? {
middleware::MiddlewareDecision::Continue => {
messages = mw_ctx.messages;
enhanced_prompt = mw_ctx.system_prompt;
}
middleware::MiddlewareDecision::Stop(reason) => {
let _ = tx.send(LoopEvent::Complete(AgentLoopResult {
response: reason,
input_tokens: 0,
output_tokens: 0,
iterations: 1,
})).await;
return Ok(rx);
}
}
}
// Clone necessary data for the async task
let session_id_clone = session_id.clone();
let memory = self.memory.clone();
let driver = self.driver.clone();
let tools = self.tools.clone();
let loop_guard_clone = self.loop_guard.lock().unwrap_or_else(|e| e.into_inner()).clone();
let middleware_chain = self.middleware_chain.clone();
let skill_executor = self.skill_executor.clone();
let path_validator = self.path_validator.clone();
let agent_id = self.agent_id.clone();
let model = self.model.clone();
let max_tokens = self.max_tokens;
let temperature = self.temperature;
let thinking_enabled = self.thinking_enabled;
let reasoning_effort = self.reasoning_effort.clone();
let plan_mode = self.plan_mode;
tokio::spawn(async move {
let mut messages = messages;
let loop_guard_clone = Mutex::new(loop_guard_clone);
let max_iterations = 10;
let mut iteration = 0;
let mut total_input_tokens = 0u32;
let mut total_output_tokens = 0u32;
'outer: loop {
iteration += 1;
if iteration > max_iterations {
let _ = tx.send(LoopEvent::Error("达到最大迭代次数".to_string())).await;
break;
}
// Notify iteration start
let _ = tx.send(LoopEvent::IterationStart {
iteration,
max_iterations,
}).await;
// Build completion request
let request = CompletionRequest {
model: model.clone(),
system: Some(enhanced_prompt.clone()),
messages: messages.clone(),
tools: tools.definitions(),
max_tokens: Some(max_tokens),
temperature: Some(temperature),
stop: Vec::new(),
stream: true,
thinking_enabled,
reasoning_effort: reasoning_effort.clone(),
plan_mode,
};
let mut stream = driver.stream(request);
let mut pending_tool_calls: Vec<(String, String, serde_json::Value)> = Vec::new();
let mut iteration_text = String::new();
let mut reasoning_text = String::new(); // Track reasoning separately for API requirement
// Process stream chunks
tracing::debug!("[AgentLoop] Starting to process stream chunks");
let mut chunk_count: usize = 0;
let mut text_delta_count: usize = 0;
let mut thinking_delta_count: usize = 0;
let mut stream_errored = false;
let chunk_timeout = std::time::Duration::from_secs(60);
loop {
match tokio::time::timeout(chunk_timeout, stream.next()).await {
Ok(Some(Ok(chunk))) => {
chunk_count += 1;
match &chunk {
StreamChunk::TextDelta { delta } => {
text_delta_count += 1;
tracing::debug!("[AgentLoop] TextDelta #{}: {} chars", text_delta_count, delta.len());
iteration_text.push_str(delta);
let _ = tx.send(LoopEvent::Delta(delta.clone())).await;
}
StreamChunk::ThinkingDelta { delta } => {
thinking_delta_count += 1;
tracing::debug!("[AgentLoop] ThinkingDelta #{}: {} chars", thinking_delta_count, delta.len());
reasoning_text.push_str(delta);
let _ = tx.send(LoopEvent::ThinkingDelta(delta.clone())).await;
}
StreamChunk::ToolUseStart { id, name } => {
tracing::debug!("[AgentLoop] ToolUseStart: id={}, name={}", id, name);
pending_tool_calls.push((id.clone(), name.clone(), serde_json::Value::Null));
}
StreamChunk::ToolUseDelta { id, delta } => {
// Accumulate tool input delta (internal processing, not sent to user)
if let Some(tool) = pending_tool_calls.iter_mut().find(|(tid, _, _)| tid == id) {
// Try to accumulate JSON string
match &mut tool.2 {
serde_json::Value::String(s) => s.push_str(delta),
serde_json::Value::Null => tool.2 = serde_json::Value::String(delta.clone()),
_ => {}
}
}
}
StreamChunk::ToolUseEnd { id, input } => {
tracing::debug!("[AgentLoop] ToolUseEnd: id={}, input={:?}", id, input);
// Update with final parsed input and emit ToolStart event
if let Some(tool) = pending_tool_calls.iter_mut().find(|(tid, _, _)| tid == id) {
tool.2 = input.clone();
let _ = tx.send(LoopEvent::ToolStart { name: tool.1.clone(), input: input.clone() }).await;
}
}
StreamChunk::Complete { input_tokens: it, output_tokens: ot, .. } => {
tracing::debug!("[AgentLoop] Stream complete: input_tokens={}, output_tokens={}", it, ot);
total_input_tokens += *it;
total_output_tokens += *ot;
// Calibrate token estimation on first iteration
if iteration == 1 {
compaction::update_calibration(
compaction::estimate_messages_tokens(&messages),
*it,
);
}
}
StreamChunk::Error { message } => {
tracing::error!("[AgentLoop] Stream error: {}", message);
let _ = tx.send(LoopEvent::Error(message.clone())).await;
stream_errored = true;
}
}
}
Ok(Some(Err(e))) => {
tracing::error!("[AgentLoop] Chunk error: {}", e);
let _ = tx.send(LoopEvent::Error(format!("LLM 响应错误: {}", e.to_string()))).await;
stream_errored = true;
}
Ok(None) => break, // Stream ended normally
Err(_) => {
tracing::error!("[AgentLoop] Stream chunk timeout ({}s)", chunk_timeout.as_secs());
let _ = tx.send(LoopEvent::Error("LLM 响应超时,请重试".to_string())).await;
stream_errored = true;
}
}
if stream_errored {
break;
}
}
tracing::info!("[AgentLoop] Stream ended: {} total chunks (text={}, thinking={}, tools={}), iteration_text={} chars",
chunk_count, text_delta_count, thinking_delta_count, pending_tool_calls.len(),
iteration_text.len());
// Fallback: if model generated reasoning but no text content,
// use reasoning as text response. This happens with some thinking models
// (DeepSeek R1, QWQ) that put the answer in reasoning_content instead of content.
// Safe now because: (1) context is clean (no stale user_profile/memory injection),
// (2) max_tokens=16384 prevents truncation, (3) reasoning is about the correct topic.
if iteration_text.is_empty() && !reasoning_text.is_empty() {
tracing::info!("[AgentLoop] Model generated {} chars of reasoning but no text — using reasoning as response",
reasoning_text.len());
let _ = tx.send(LoopEvent::Delta(reasoning_text.clone())).await;
iteration_text = reasoning_text.clone();
} else if iteration_text.is_empty() {
tracing::warn!("[AgentLoop] No text content after {} chunks (thinking_delta={})",
chunk_count, thinking_delta_count);
}
// If no tool calls, we have the final response
if pending_tool_calls.is_empty() {
tracing::info!("[AgentLoop] No tool calls, returning final response: {} chars (reasoning: {} chars)", iteration_text.len(), reasoning_text.len());
// Save final assistant message with reasoning
if let Err(e) = memory.append_message(&session_id_clone, &Message::assistant_with_thinking(
&iteration_text,
&reasoning_text,
)).await {
tracing::warn!("[AgentLoop] Failed to save final assistant message: {}", e);
}
let _ = tx.send(LoopEvent::Complete(AgentLoopResult {
response: iteration_text.clone(),
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations: iteration,
})).await;
// Post-completion: middleware after_completion (memory extraction, etc.)
if let Some(ref chain) = middleware_chain {
let mw_ctx = middleware::MiddlewareContext {
agent_id: agent_id.clone(),
session_id: session_id_clone.clone(),
user_input: String::new(),
system_prompt: enhanced_prompt.clone(),
messages: memory.get_messages(&session_id_clone).await.unwrap_or_default(),
response_content: Vec::new(),
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
};
if let Err(e) = chain.run_after_completion(&mw_ctx).await {
tracing::warn!("[AgentLoop] Streaming middleware after_completion failed: {}", e);
}
}
break 'outer;
}
// Skip tool processing if stream errored or timed out
if stream_errored {
tracing::debug!("[AgentLoop] Stream errored, skipping tool processing and breaking");
break 'outer;
}
tracing::debug!("[AgentLoop] Processing {} tool calls (reasoning: {} chars)", pending_tool_calls.len(), reasoning_text.len());
// Push assistant message with reasoning before tool calls (required by Kimi and other thinking-enabled APIs)
messages.push(Message::assistant_with_thinking(
&iteration_text,
&reasoning_text,
));
// There are tool calls - add to message history
for (id, name, input) in &pending_tool_calls {
tracing::debug!("[AgentLoop] Adding tool_use to history: id={}, name={}, input={:?}", id, name, input);
messages.push(Message::tool_use(id, zclaw_types::ToolId::new(name), input.clone()));
}
// Execute tools
for (id, name, input) in pending_tool_calls {
tracing::debug!("[AgentLoop] Executing tool: name={}, input={:?}", name, input);
// Check tool call safety — via middleware chain or inline loop guard
if let Some(ref chain) = middleware_chain {
let mw_ctx = middleware::MiddlewareContext {
agent_id: agent_id.clone(),
session_id: session_id_clone.clone(),
user_input: input.to_string(),
system_prompt: enhanced_prompt.clone(),
messages: messages.clone(),
response_content: Vec::new(),
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
};
match chain.run_before_tool_call(&mw_ctx, &name, &input).await {
Ok(middleware::ToolCallDecision::Allow) => {}
Ok(middleware::ToolCallDecision::Block(msg)) => {
tracing::warn!("[AgentLoop] Tool '{}' blocked by middleware: {}", name, msg);
let error_output = serde_json::json!({ "error": msg });
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: error_output.clone() }).await;
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
continue;
}
Ok(middleware::ToolCallDecision::AbortLoop(reason)) => {
tracing::warn!("[AgentLoop] Loop aborted by middleware: {}", reason);
let _ = tx.send(LoopEvent::Error(reason)).await;
break 'outer;
}
Ok(middleware::ToolCallDecision::ReplaceInput(new_input)) => {
// Execute with replaced input (same path_validator logic below)
let pv = path_validator.clone().unwrap_or_else(|| {
let home = std::env::var("USERPROFILE")
.or_else(|_| std::env::var("HOME"))
.unwrap_or_else(|_| ".".to_string());
PathValidator::new().with_workspace(std::path::PathBuf::from(&home))
});
let working_dir = pv.workspace_root()
.map(|p| p.to_string_lossy().to_string());
let tool_context = ToolContext {
agent_id: agent_id.clone(),
working_directory: working_dir,
session_id: Some(session_id_clone.to_string()),
skill_executor: skill_executor.clone(),
path_validator: Some(pv),
event_sender: Some(tx.clone()),
};
let (result, is_error) = if let Some(tool) = tools.get(&name) {
match tool.execute(new_input, &tool_context).await {
Ok(output) => {
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: output.clone() }).await;
(output, false)
}
Err(e) => {
let error_output = serde_json::json!({ "error": e.to_string() });
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: error_output.clone() }).await;
(error_output, true)
}
}
} else {
let error_output = serde_json::json!({ "error": format!("Unknown tool: {}", name) });
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: error_output.clone() }).await;
(error_output, true)
};
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), result, is_error));
continue;
}
Err(e) => {
tracing::error!("[AgentLoop] Middleware error for tool '{}': {}", name, e);
let error_output = serde_json::json!({ "error": e.to_string() });
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: error_output.clone() }).await;
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
continue;
}
}
} else {
// Legacy inline loop guard path
let guard_result = loop_guard_clone.lock().unwrap_or_else(|e| e.into_inner()).check(&name, &input);
match guard_result {
LoopGuardResult::CircuitBreaker => {
let _ = tx.send(LoopEvent::Error("检测到工具调用循环,已自动终止".to_string())).await;
break 'outer;
}
LoopGuardResult::Blocked => {
tracing::warn!("[AgentLoop] Tool '{}' blocked by loop guard", name);
let error_output = serde_json::json!({ "error": "工具调用被循环防护拦截" });
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: error_output.clone() }).await;
messages.push(Message::tool_result(id, zclaw_types::ToolId::new(&name), error_output, true));
continue;
}
LoopGuardResult::Warn => {
tracing::warn!("[AgentLoop] Tool '{}' triggered loop guard warning", name);
}
LoopGuardResult::Allowed => {}
}
}
// Use pre-resolved path_validator (already has default fallback from create_tool_context logic)
let pv = path_validator.clone().unwrap_or_else(|| {
let home = std::env::var("USERPROFILE")
.or_else(|_| std::env::var("HOME"))
.unwrap_or_else(|_| ".".to_string());
PathValidator::new().with_workspace(std::path::PathBuf::from(&home))
});
let working_dir = pv.workspace_root()
.map(|p| p.to_string_lossy().to_string());
let tool_context = ToolContext {
agent_id: agent_id.clone(),
working_directory: working_dir,
session_id: Some(session_id_clone.to_string()),
skill_executor: skill_executor.clone(),
path_validator: Some(pv),
event_sender: Some(tx.clone()),
};
let (result, is_error) = if let Some(tool) = tools.get(&name) {
tracing::debug!("[AgentLoop] Tool '{}' found, executing...", name);
match tool.execute(input.clone(), &tool_context).await {
Ok(output) => {
tracing::debug!("[AgentLoop] Tool '{}' executed successfully: {:?}", name, output);
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: output.clone() }).await;
(output, false)
}
Err(e) => {
tracing::error!("[AgentLoop] Tool '{}' execution failed: {}", name, e);
let error_output = serde_json::json!({ "error": e.to_string() });
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: error_output.clone() }).await;
(error_output, true)
}
}
} else {
tracing::error!("[AgentLoop] Tool '{}' not found in registry", name);
let error_output = serde_json::json!({ "error": format!("Unknown tool: {}", name) });
let _ = tx.send(LoopEvent::ToolEnd { name: name.clone(), output: error_output.clone() }).await;
(error_output, true)
};
// Check if this is a clarification response — break outer loop
if name == "ask_clarification"
&& result.get("status").and_then(|v| v.as_str()) == Some("clarification_needed")
{
tracing::info!("[AgentLoop] Streaming: Clarification requested, terminating loop");
let question = result.get("question")
.and_then(|v| v.as_str())
.unwrap_or("需要更多信息")
.to_string();
messages.push(Message::tool_result(
id,
zclaw_types::ToolId::new(&name),
result,
is_error,
));
// Send the question as final delta so the user sees it
let _ = tx.send(LoopEvent::Delta(question.clone())).await;
let _ = tx.send(LoopEvent::Complete(AgentLoopResult {
response: question.clone(),
input_tokens: total_input_tokens,
output_tokens: total_output_tokens,
iterations: iteration,
})).await;
if let Err(e) = memory.append_message(&session_id_clone, &Message::assistant(&question)).await {
tracing::warn!("[AgentLoop] Failed to save clarification message: {}", e);
}
break 'outer;
}
// Add tool result to message history
tracing::debug!("[AgentLoop] Adding tool_result to history: id={}, name={}, is_error={}", id, name, is_error);
messages.push(Message::tool_result(
id,
zclaw_types::ToolId::new(&name),
result,
is_error,
));
}
tracing::debug!("[AgentLoop] Continuing to next iteration for LLM to process tool results");
// Continue loop - next iteration will call LLM with tool results
}
});
Ok(rx)
}
}
/// Result of an agent loop execution
#[derive(Debug, Clone)]
pub struct AgentLoopResult {
pub response: String,
pub input_tokens: u32,
pub output_tokens: u32,
pub iterations: usize,
}
/// Events emitted during streaming
#[derive(Debug, Clone)]
pub enum LoopEvent {
/// Text delta from LLM
Delta(String),
/// Thinking/reasoning delta from LLM (extended thinking)
ThinkingDelta(String),
/// Tool execution started
ToolStart { name: String, input: serde_json::Value },
/// Tool execution completed
ToolEnd { name: String, output: serde_json::Value },
/// Sub-agent task status update (started/running/completed/failed)
SubtaskStatus {
description: String,
status: String,
detail: Option<String>,
},
/// New iteration started (multi-turn tool calling)
IterationStart { iteration: usize, max_iterations: usize },
/// Loop completed with final result
Complete(AgentLoopResult),
/// Error occurred
Error(String),
}