zclaw_openfang/crates/zclaw-runtime/src/middleware/dangling_tool.rs

//! Dangling tool-call repair middleware — detects and patches missing tool-result
//! messages that would cause LLM API errors.
//!
//! When the LLM produces a `ToolUse` content block but the agent loop fails to
//! produce a corresponding `ToolResult` message (e.g. due to a crash or timeout),
//! the conversation history becomes inconsistent. The next LLM call would fail with
//! an API error because ToolUse messages must be followed by ToolResult messages.
//!
//! This middleware inspects the message history before each completion and appends
//! placeholder ToolResult messages for any dangling ToolUse entries.

use std::collections::HashSet;

use async_trait::async_trait;
use zclaw_types::{Message, Result};
use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};

/// Middleware that repairs dangling tool-use blocks in conversation history.
///
/// Priority 300 — runs before tool error middleware (350) and guardrail (400).
pub struct DanglingToolMiddleware;

impl DanglingToolMiddleware {
    pub fn new() -> Self {
        Self
    }
}

impl Default for DanglingToolMiddleware {
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
impl AgentMiddleware for DanglingToolMiddleware {
    fn name(&self) -> &str { "dangling_tool" }
    fn priority(&self) -> i32 { 300 }

    async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
        let mut patched_count = 0usize;

        // Step 1: Collect all ToolUse IDs and matched ToolResult IDs across the
        // entire message list (not just adjacent pairs).
        let mut tool_use_ids: Vec<(String, String)> = Vec::new(); // (id, tool_name)
        let mut tool_result_ids: HashSet<String> = HashSet::new();

        for msg in &ctx.messages {
            match msg {
                Message::ToolUse { ref id, ref tool, .. } => {
                    tool_use_ids.push((id.clone(), tool.as_str().to_string()));
                }
                Message::ToolResult { ref tool_call_id, ref output, .. } => {
                    // Original results always count as matched regardless of patch status.
                    // We insert unconditionally so that the HashSet contains the ID,
                    // preventing false-positive "dangling" detection.
                    let _ = output; // suppress unused warning — patch check is informational only
                    tool_result_ids.insert(tool_call_id.clone());
                }
                _ => {}
            }
        }

        // Step 2: Find dangling ToolUse entries that have no matching ToolResult.
        let dangling_ids: HashSet<String> = tool_use_ids.iter()
            .filter(|(id, _)| !tool_result_ids.contains(id))
            .map(|(id, _)| id.clone())
            .collect();

        if dangling_ids.is_empty() {
            return Ok(MiddlewareDecision::Continue);
        }

        // Step 3: Insert placeholder ToolResult for each dangling ToolUse.
        // Also skip ToolUse entries that already have a patched placeholder further
        // down the list (prevents double-patching if the middleware runs twice).
        let capacity = ctx.messages.len() + dangling_ids.len();
        let mut patched_messages: Vec<Message> = Vec::with_capacity(capacity);

        for msg in &ctx.messages {
            patched_messages.push(msg.clone());

            if let Message::ToolUse { ref id, ref tool, .. } = msg {
                if dangling_ids.contains(id) {
                    tracing::warn!(
                        "[DanglingToolMiddleware] Patching dangling ToolUse: tool={}, id={}",
                        tool.as_str(), id
                    );
                    let placeholder = Message::tool_result(
                        id.clone(),
                        tool.clone(),
                        serde_json::json!({
                            "error": "Tool execution was interrupted. Please retry or use an alternative approach.",
                            "tool_patch": true,
                        }),
                        true, // is_error
                    );
                    patched_messages.push(placeholder);
                    patched_count += 1;
                }
            }
        }

        // Step 4: Detect streaming interrupt — if the last message is an Assistant
        // response while there were dangling tools, the user likely interrupted a
        // streaming response mid-tool-execution.  No additional action is needed
        // beyond the patched ToolResult messages that now prevent API errors.
        if let Some(Message::Assistant { .. }) = patched_messages.last() {
            tracing::debug!(
                "[DanglingToolMiddleware] Streaming interrupt detected with {} dangling tools",
                patched_count
            );
        }

        if patched_count > 0 {
            tracing::info!(
                "[DanglingToolMiddleware] Patched {} dangling tool-use blocks",
                patched_count
            );
            ctx.messages = patched_messages;
        }

        Ok(MiddlewareDecision::Continue)
    }
}