//! Dangling tool-call repair middleware — detects and patches missing tool-result //! messages that would cause LLM API errors. //! //! When the LLM produces a `ToolUse` content block but the agent loop fails to //! produce a corresponding `ToolResult` message (e.g. due to a crash or timeout), //! the conversation history becomes inconsistent. The next LLM call would fail with //! an API error because ToolUse messages must be followed by ToolResult messages. //! //! This middleware inspects the message history before each completion and appends //! placeholder ToolResult messages for any dangling ToolUse entries. use std::collections::HashSet; use async_trait::async_trait; use zclaw_types::{Message, Result}; use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision}; /// Middleware that repairs dangling tool-use blocks in conversation history. /// /// Priority 300 — runs before tool error middleware (350) and guardrail (400). pub struct DanglingToolMiddleware; impl DanglingToolMiddleware { pub fn new() -> Self { Self } } impl Default for DanglingToolMiddleware { fn default() -> Self { Self::new() } } #[async_trait] impl AgentMiddleware for DanglingToolMiddleware { fn name(&self) -> &str { "dangling_tool" } fn priority(&self) -> i32 { 300 } async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result { let mut patched_count = 0usize; // Step 1: Collect all ToolUse IDs and matched ToolResult IDs across the // entire message list (not just adjacent pairs). let mut tool_use_ids: Vec<(String, String)> = Vec::new(); // (id, tool_name) let mut tool_result_ids: HashSet = HashSet::new(); for msg in &ctx.messages { match msg { Message::ToolUse { ref id, ref tool, .. } => { tool_use_ids.push((id.clone(), tool.as_str().to_string())); } Message::ToolResult { ref tool_call_id, ref output, .. } => { // Original results always count as matched regardless of patch status. // We insert unconditionally so that the HashSet contains the ID, // preventing false-positive "dangling" detection. let _ = output; // suppress unused warning — patch check is informational only tool_result_ids.insert(tool_call_id.clone()); } _ => {} } } // Step 2: Find dangling ToolUse entries that have no matching ToolResult. let dangling_ids: HashSet = tool_use_ids.iter() .filter(|(id, _)| !tool_result_ids.contains(id)) .map(|(id, _)| id.clone()) .collect(); if dangling_ids.is_empty() { return Ok(MiddlewareDecision::Continue); } // Step 3: Insert placeholder ToolResult for each dangling ToolUse. // Also skip ToolUse entries that already have a patched placeholder further // down the list (prevents double-patching if the middleware runs twice). let capacity = ctx.messages.len() + dangling_ids.len(); let mut patched_messages: Vec = Vec::with_capacity(capacity); for msg in &ctx.messages { patched_messages.push(msg.clone()); if let Message::ToolUse { ref id, ref tool, .. } = msg { if dangling_ids.contains(id) { tracing::warn!( "[DanglingToolMiddleware] Patching dangling ToolUse: tool={}, id={}", tool.as_str(), id ); let placeholder = Message::tool_result( id.clone(), tool.clone(), serde_json::json!({ "error": "Tool execution was interrupted. Please retry or use an alternative approach.", "tool_patch": true, }), true, // is_error ); patched_messages.push(placeholder); patched_count += 1; } } } // Step 4: Detect streaming interrupt — if the last message is an Assistant // response while there were dangling tools, the user likely interrupted a // streaming response mid-tool-execution. No additional action is needed // beyond the patched ToolResult messages that now prevent API errors. if let Some(Message::Assistant { .. }) = patched_messages.last() { tracing::debug!( "[DanglingToolMiddleware] Streaming interrupt detected with {} dangling tools", patched_count ); } if patched_count > 0 { tracing::info!( "[DanglingToolMiddleware] Patched {} dangling tool-use blocks", patched_count ); ctx.messages = patched_messages; } Ok(MiddlewareDecision::Continue) } }