Phase 1: Anthropic prompt caching - Add cache_control ephemeral on system prompt blocks - Track cache_creation/cache_read tokens in CompletionResponse + StreamChunk Phase 2A: Parallel tool execution - Add ToolConcurrency enum (ReadOnly/Exclusive/Interactive) - JoinSet + Semaphore(3) for bounded parallel tool calls - 7 tools annotated with correct concurrency level - AtomicU32 for lock-free failure tracking in ToolErrorMiddleware Phase 2B: Tool output pruning - prune_tool_outputs() trims old ToolResult > 2000 chars to 500 chars - Integrated into CompactionMiddleware before token estimation Phase 3: Error classification + smart retry - LlmErrorKind + ClassifiedLlmError for structured error mapping - RetryDriver decorator with jittered exponential backoff - Kernel wraps all LLM calls with RetryDriver - CONTEXT_OVERFLOW recovery triggers emergency compaction in loop_runner
237 lines
8.4 KiB
Rust
237 lines
8.4 KiB
Rust
//! Hands chain seam tests
|
|
//!
|
|
//! Verifies the integration seams in the Hand execution pipeline:
|
|
//! 1. Tool routing: LLM tool_call → HandRegistry correct dispatch
|
|
//! 2. Execution callback: Hand complete → LoopEvent emitted
|
|
//! 3. Non-hand tool routing
|
|
|
|
use std::sync::Arc;
|
|
use zclaw_kernel::{Kernel, KernelConfig};
|
|
use zclaw_runtime::test_util::MockLlmDriver;
|
|
use zclaw_runtime::stream::StreamChunk;
|
|
use zclaw_runtime::{LoopEvent, LlmDriver};
|
|
use zclaw_types::AgentConfig;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Seam 1: Tool routing — LLM tool_call triggers HandTool dispatch
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[tokio::test]
|
|
async fn seam_hand_tool_routing() {
|
|
// First stream: tool_use for hand_quiz
|
|
let mock = MockLlmDriver::new()
|
|
.with_stream_chunks(vec![
|
|
StreamChunk::TextDelta { delta: "Let me generate a quiz.".to_string() },
|
|
StreamChunk::ToolUseStart { id: "call_quiz_1".to_string(), name: "hand_quiz".to_string() },
|
|
StreamChunk::ToolUseEnd {
|
|
id: "call_quiz_1".to_string(),
|
|
input: serde_json::json!({ "topic": "math", "count": 3 }),
|
|
},
|
|
StreamChunk::Complete {
|
|
input_tokens: 10,
|
|
output_tokens: 20,
|
|
stop_reason: "tool_use".to_string(),
|
|
cache_creation_input_tokens: None,
|
|
cache_read_input_tokens: None,
|
|
},
|
|
])
|
|
// Second stream: final text after tool executes
|
|
.with_stream_chunks(vec![
|
|
StreamChunk::TextDelta { delta: "Here is your quiz!".to_string() },
|
|
StreamChunk::Complete {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
stop_reason: "end_turn".to_string(),
|
|
cache_creation_input_tokens: None,
|
|
cache_read_input_tokens: None,
|
|
},
|
|
]);
|
|
|
|
let config = KernelConfig::default();
|
|
let kernel = Kernel::boot_with_driver(config, Arc::new(mock) as Arc<dyn LlmDriver>)
|
|
.await
|
|
.expect("kernel boot");
|
|
|
|
let agent_config = AgentConfig::new("test-agent")
|
|
.with_system_prompt("You are a test assistant.");
|
|
let id = agent_config.id;
|
|
kernel.spawn_agent(agent_config).await.expect("spawn agent");
|
|
|
|
let mut rx = kernel
|
|
.send_message_stream(&id, "Generate a math quiz".to_string())
|
|
.await
|
|
.expect("stream");
|
|
|
|
let mut tool_starts = Vec::new();
|
|
let mut tool_ends = Vec::new();
|
|
let mut got_complete = false;
|
|
while let Some(event) = rx.recv().await {
|
|
match &event {
|
|
LoopEvent::ToolStart { name, input } => {
|
|
tool_starts.push((name.clone(), input.clone()));
|
|
}
|
|
LoopEvent::ToolEnd { name, output } => {
|
|
tool_ends.push((name.clone(), output.clone()));
|
|
}
|
|
LoopEvent::Complete(_) => {
|
|
got_complete = true;
|
|
break;
|
|
}
|
|
LoopEvent::Error(msg) => {
|
|
panic!("unexpected error: {}", msg);
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
assert!(got_complete, "stream should complete");
|
|
assert!(
|
|
tool_starts.iter().any(|(n, _)| n == "hand_quiz"),
|
|
"should see hand_quiz tool_start, got: {:?}",
|
|
tool_starts
|
|
);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Seam 2: Execution callback — Hand completes and produces tool_end
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[tokio::test]
|
|
async fn seam_hand_execution_callback() {
|
|
let mock = MockLlmDriver::new()
|
|
.with_stream_chunks(vec![
|
|
StreamChunk::ToolUseStart { id: "call_quiz_1".to_string(), name: "hand_quiz".to_string() },
|
|
StreamChunk::ToolUseEnd {
|
|
id: "call_quiz_1".to_string(),
|
|
input: serde_json::json!({ "topic": "math" }),
|
|
},
|
|
StreamChunk::Complete {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
stop_reason: "tool_use".to_string(),
|
|
cache_creation_input_tokens: None,
|
|
cache_read_input_tokens: None,
|
|
},
|
|
])
|
|
.with_stream_chunks(vec![
|
|
StreamChunk::TextDelta { delta: "Done!".to_string() },
|
|
StreamChunk::Complete {
|
|
input_tokens: 5,
|
|
output_tokens: 1,
|
|
stop_reason: "end_turn".to_string(),
|
|
cache_creation_input_tokens: None,
|
|
cache_read_input_tokens: None,
|
|
},
|
|
]);
|
|
|
|
let config = KernelConfig::default();
|
|
let kernel = Kernel::boot_with_driver(config, Arc::new(mock) as Arc<dyn LlmDriver>)
|
|
.await
|
|
.expect("kernel boot");
|
|
|
|
let agent_config = AgentConfig::new("test-agent");
|
|
let id = agent_config.id;
|
|
kernel.spawn_agent(agent_config).await.expect("spawn agent");
|
|
|
|
let mut rx = kernel
|
|
.send_message_stream(&id, "Quiz me".to_string())
|
|
.await
|
|
.expect("stream");
|
|
|
|
let mut got_tool_end = false;
|
|
let mut got_complete = false;
|
|
while let Some(event) = rx.recv().await {
|
|
match &event {
|
|
LoopEvent::ToolEnd { name, output } => {
|
|
got_tool_end = true;
|
|
assert!(name.starts_with("hand_"), "tool_end should be hand tool, got: {}", name);
|
|
// Quiz hand returns structured JSON output
|
|
assert!(output.is_object() || output.is_string(), "output should be JSON, got: {}", output);
|
|
}
|
|
LoopEvent::Complete(_) => {
|
|
got_complete = true;
|
|
break;
|
|
}
|
|
LoopEvent::Error(msg) => {
|
|
panic!("unexpected error: {}", msg);
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
assert!(got_tool_end, "should receive tool_end after hand execution");
|
|
assert!(got_complete, "should complete after tool_end");
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Seam 3: Non-hand tool call (generic tool) routes correctly
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#[tokio::test]
|
|
async fn seam_generic_tool_routing() {
|
|
// Mock with a generic tool call (web_search)
|
|
let mock = MockLlmDriver::new()
|
|
.with_stream_chunks(vec![
|
|
StreamChunk::ToolUseStart { id: "call_ws_1".to_string(), name: "web_search".to_string() },
|
|
StreamChunk::ToolUseEnd {
|
|
id: "call_ws_1".to_string(),
|
|
input: serde_json::json!({ "query": "test query" }),
|
|
},
|
|
StreamChunk::Complete {
|
|
input_tokens: 10,
|
|
output_tokens: 5,
|
|
stop_reason: "tool_use".to_string(),
|
|
cache_creation_input_tokens: None,
|
|
cache_read_input_tokens: None,
|
|
},
|
|
])
|
|
.with_stream_chunks(vec![
|
|
StreamChunk::TextDelta { delta: "Search results found.".to_string() },
|
|
StreamChunk::Complete {
|
|
input_tokens: 5,
|
|
output_tokens: 3,
|
|
stop_reason: "end_turn".to_string(),
|
|
cache_creation_input_tokens: None,
|
|
cache_read_input_tokens: None,
|
|
},
|
|
]);
|
|
|
|
let config = KernelConfig::default();
|
|
let kernel = Kernel::boot_with_driver(config, Arc::new(mock) as Arc<dyn LlmDriver>)
|
|
.await
|
|
.expect("kernel boot");
|
|
|
|
let agent_config = AgentConfig::new("test-agent");
|
|
let id = agent_config.id;
|
|
kernel.spawn_agent(agent_config).await.expect("spawn agent");
|
|
|
|
let mut rx = kernel
|
|
.send_message_stream(&id, "Search for test".to_string())
|
|
.await
|
|
.expect("stream");
|
|
|
|
let mut tool_names = Vec::new();
|
|
let mut got_complete = false;
|
|
while let Some(event) = rx.recv().await {
|
|
match &event {
|
|
LoopEvent::ToolStart { name, .. } => tool_names.push(name.clone()),
|
|
LoopEvent::ToolEnd { name, .. } => tool_names.push(format!("end:{}", name)),
|
|
LoopEvent::Complete(_) => {
|
|
got_complete = true;
|
|
break;
|
|
}
|
|
LoopEvent::Error(msg) => {
|
|
panic!("unexpected error: {}", msg);
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
assert!(got_complete, "stream should complete");
|
|
assert!(
|
|
tool_names.iter().any(|n| n.contains("web_search")),
|
|
"should see web_search tool events, got: {:?}",
|
|
tool_names
|
|
);
|
|
}
|