Files
zclaw_openfang/crates/zclaw-runtime/src/stream.rs
iven 9060935401 perf(runtime): Hermes Phase 1-3 — prompt caching + parallel tools + smart retry
Phase 1: Anthropic prompt caching
- Add cache_control ephemeral on system prompt blocks
- Track cache_creation/cache_read tokens in CompletionResponse + StreamChunk

Phase 2A: Parallel tool execution
- Add ToolConcurrency enum (ReadOnly/Exclusive/Interactive)
- JoinSet + Semaphore(3) for bounded parallel tool calls
- 7 tools annotated with correct concurrency level
- AtomicU32 for lock-free failure tracking in ToolErrorMiddleware

Phase 2B: Tool output pruning
- prune_tool_outputs() trims old ToolResult > 2000 chars to 500 chars
- Integrated into CompactionMiddleware before token estimation

Phase 3: Error classification + smart retry
- LlmErrorKind + ClassifiedLlmError for structured error mapping
- RetryDriver decorator with jittered exponential backoff
- Kernel wraps all LLM calls with RetryDriver
- CONTEXT_OVERFLOW recovery triggers emergency compaction in loop_runner
2026-04-24 08:39:56 +08:00

106 lines
3.1 KiB
Rust

//! Streaming response types
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc;
use zclaw_types::Result;
/// Stream chunk emitted during streaming
/// This is the serializable type sent via Tauri events
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum StreamChunk {
/// Text delta
TextDelta { delta: String },
/// Thinking delta (for extended thinking models)
ThinkingDelta { delta: String },
/// Tool use started
ToolUseStart { id: String, name: String },
/// Tool use input delta
ToolUseDelta { id: String, delta: String },
/// Tool use completed
ToolUseEnd { id: String, input: serde_json::Value },
/// Stream completed
Complete {
input_tokens: u32,
output_tokens: u32,
stop_reason: String,
#[serde(default)]
cache_creation_input_tokens: Option<u32>,
#[serde(default)]
cache_read_input_tokens: Option<u32>,
},
/// Error occurred
Error { message: String },
}
/// Streaming event for Tauri emission
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamEvent {
/// Session ID for routing
pub session_id: String,
/// Agent ID for routing
pub agent_id: String,
/// The chunk content
pub chunk: StreamChunk,
}
impl StreamEvent {
pub fn new(session_id: impl Into<String>, agent_id: impl Into<String>, chunk: StreamChunk) -> Self {
Self {
session_id: session_id.into(),
agent_id: agent_id.into(),
chunk,
}
}
}
/// Legacy stream event for internal use with mpsc channels
#[derive(Debug, Clone)]
pub enum InternalStreamEvent {
/// Text delta received
TextDelta(String),
/// Thinking delta received
ThinkingDelta(String),
/// Tool use started
ToolUseStart { id: String, name: String },
/// Tool use input chunk
ToolUseInput { id: String, chunk: String },
/// Tool use completed
ToolUseEnd { id: String, input: serde_json::Value },
/// Response completed
Complete { input_tokens: u32, output_tokens: u32 },
/// Error occurred
Error(String),
}
/// Stream sender wrapper
pub struct StreamSender {
tx: mpsc::Sender<InternalStreamEvent>,
}
impl StreamSender {
pub fn new(tx: mpsc::Sender<InternalStreamEvent>) -> Self {
Self { tx }
}
pub async fn send_text(&self, delta: impl Into<String>) -> Result<()> {
self.tx.send(InternalStreamEvent::TextDelta(delta.into())).await.ok();
Ok(())
}
pub async fn send_thinking(&self, delta: impl Into<String>) -> Result<()> {
self.tx.send(InternalStreamEvent::ThinkingDelta(delta.into())).await.ok();
Ok(())
}
pub async fn send_complete(&self, input_tokens: u32, output_tokens: u32) -> Result<()> {
self.tx.send(InternalStreamEvent::Complete { input_tokens, output_tokens }).await.ok();
Ok(())
}
pub async fn send_error(&self, error: impl Into<String>) -> Result<()> {
self.tx.send(InternalStreamEvent::Error(error.into())).await.ok();
Ok(())
}
}