perf(runtime): Hermes Phase 1-3 — prompt caching + parallel tools + smart retry
Phase 1: Anthropic prompt caching - Add cache_control ephemeral on system prompt blocks - Track cache_creation/cache_read tokens in CompletionResponse + StreamChunk Phase 2A: Parallel tool execution - Add ToolConcurrency enum (ReadOnly/Exclusive/Interactive) - JoinSet + Semaphore(3) for bounded parallel tool calls - 7 tools annotated with correct concurrency level - AtomicU32 for lock-free failure tracking in ToolErrorMiddleware Phase 2B: Tool output pruning - prune_tool_outputs() trims old ToolResult > 2000 chars to 500 chars - Integrated into CompactionMiddleware before token estimation Phase 3: Error classification + smart retry - LlmErrorKind + ClassifiedLlmError for structured error mapping - RetryDriver decorator with jittered exponential backoff - Kernel wraps all LLM calls with RetryDriver - CONTEXT_OVERFLOW recovery triggers emergency compaction in loop_runner
This commit is contained in:
@@ -117,7 +117,9 @@ impl Kernel {
|
||||
}
|
||||
}
|
||||
|
||||
use zclaw_runtime::{AgentLoop, tool::builtin::PathValidator};
|
||||
use std::sync::Arc;
|
||||
use zclaw_runtime::{AgentLoop, LlmDriver, tool::builtin::PathValidator};
|
||||
use zclaw_runtime::driver::{RetryDriver, RetryConfig};
|
||||
|
||||
use super::Kernel;
|
||||
use super::super::MessageResponse;
|
||||
@@ -161,9 +163,12 @@ impl Kernel {
|
||||
let subagent_enabled = chat_mode.as_ref().and_then(|m| m.subagent_enabled).unwrap_or(false);
|
||||
let tools = self.create_tool_registry(subagent_enabled);
|
||||
self.skill_executor.set_tool_registry(tools.clone());
|
||||
let driver: Arc<dyn LlmDriver> = Arc::new(
|
||||
RetryDriver::new(self.driver.clone(), RetryConfig::default())
|
||||
);
|
||||
let mut loop_runner = AgentLoop::new(
|
||||
*agent_id,
|
||||
self.driver.clone(),
|
||||
driver,
|
||||
tools,
|
||||
self.memory.clone(),
|
||||
)
|
||||
@@ -275,9 +280,12 @@ impl Kernel {
|
||||
let subagent_enabled = chat_mode.as_ref().and_then(|m| m.subagent_enabled).unwrap_or(false);
|
||||
let tools = self.create_tool_registry(subagent_enabled);
|
||||
self.skill_executor.set_tool_registry(tools.clone());
|
||||
let driver: Arc<dyn LlmDriver> = Arc::new(
|
||||
RetryDriver::new(self.driver.clone(), RetryConfig::default())
|
||||
);
|
||||
let mut loop_runner = AgentLoop::new(
|
||||
*agent_id,
|
||||
self.driver.clone(),
|
||||
driver,
|
||||
tools,
|
||||
self.memory.clone(),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user