Phase 1: Anthropic prompt caching - Add cache_control ephemeral on system prompt blocks - Track cache_creation/cache_read tokens in CompletionResponse + StreamChunk Phase 2A: Parallel tool execution - Add ToolConcurrency enum (ReadOnly/Exclusive/Interactive) - JoinSet + Semaphore(3) for bounded parallel tool calls - 7 tools annotated with correct concurrency level - AtomicU32 for lock-free failure tracking in ToolErrorMiddleware Phase 2B: Tool output pruning - prune_tool_outputs() trims old ToolResult > 2000 chars to 500 chars - Integrated into CompactionMiddleware before token estimation Phase 3: Error classification + smart retry - LlmErrorKind + ClassifiedLlmError for structured error mapping - RetryDriver decorator with jittered exponential backoff - Kernel wraps all LLM calls with RetryDriver - CONTEXT_OVERFLOW recovery triggers emergency compaction in loop_runner
106 lines
3.1 KiB
Rust
106 lines
3.1 KiB
Rust
//! Streaming response types
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
use tokio::sync::mpsc;
|
|
use zclaw_types::Result;
|
|
|
|
/// Stream chunk emitted during streaming
|
|
/// This is the serializable type sent via Tauri events
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(tag = "type", rename_all = "snake_case")]
|
|
pub enum StreamChunk {
|
|
/// Text delta
|
|
TextDelta { delta: String },
|
|
/// Thinking delta (for extended thinking models)
|
|
ThinkingDelta { delta: String },
|
|
/// Tool use started
|
|
ToolUseStart { id: String, name: String },
|
|
/// Tool use input delta
|
|
ToolUseDelta { id: String, delta: String },
|
|
/// Tool use completed
|
|
ToolUseEnd { id: String, input: serde_json::Value },
|
|
/// Stream completed
|
|
Complete {
|
|
input_tokens: u32,
|
|
output_tokens: u32,
|
|
stop_reason: String,
|
|
#[serde(default)]
|
|
cache_creation_input_tokens: Option<u32>,
|
|
#[serde(default)]
|
|
cache_read_input_tokens: Option<u32>,
|
|
},
|
|
/// Error occurred
|
|
Error { message: String },
|
|
}
|
|
|
|
/// Streaming event for Tauri emission
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct StreamEvent {
|
|
/// Session ID for routing
|
|
pub session_id: String,
|
|
/// Agent ID for routing
|
|
pub agent_id: String,
|
|
/// The chunk content
|
|
pub chunk: StreamChunk,
|
|
}
|
|
|
|
impl StreamEvent {
|
|
pub fn new(session_id: impl Into<String>, agent_id: impl Into<String>, chunk: StreamChunk) -> Self {
|
|
Self {
|
|
session_id: session_id.into(),
|
|
agent_id: agent_id.into(),
|
|
chunk,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Legacy stream event for internal use with mpsc channels
|
|
#[derive(Debug, Clone)]
|
|
pub enum InternalStreamEvent {
|
|
/// Text delta received
|
|
TextDelta(String),
|
|
/// Thinking delta received
|
|
ThinkingDelta(String),
|
|
/// Tool use started
|
|
ToolUseStart { id: String, name: String },
|
|
/// Tool use input chunk
|
|
ToolUseInput { id: String, chunk: String },
|
|
/// Tool use completed
|
|
ToolUseEnd { id: String, input: serde_json::Value },
|
|
/// Response completed
|
|
Complete { input_tokens: u32, output_tokens: u32 },
|
|
/// Error occurred
|
|
Error(String),
|
|
}
|
|
|
|
/// Stream sender wrapper
|
|
pub struct StreamSender {
|
|
tx: mpsc::Sender<InternalStreamEvent>,
|
|
}
|
|
|
|
impl StreamSender {
|
|
pub fn new(tx: mpsc::Sender<InternalStreamEvent>) -> Self {
|
|
Self { tx }
|
|
}
|
|
|
|
pub async fn send_text(&self, delta: impl Into<String>) -> Result<()> {
|
|
self.tx.send(InternalStreamEvent::TextDelta(delta.into())).await.ok();
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn send_thinking(&self, delta: impl Into<String>) -> Result<()> {
|
|
self.tx.send(InternalStreamEvent::ThinkingDelta(delta.into())).await.ok();
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn send_complete(&self, input_tokens: u32, output_tokens: u32) -> Result<()> {
|
|
self.tx.send(InternalStreamEvent::Complete { input_tokens, output_tokens }).await.ok();
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn send_error(&self, error: impl Into<String>) -> Result<()> {
|
|
self.tx.send(InternalStreamEvent::Error(error.into())).await.ok();
|
|
Ok(())
|
|
}
|
|
}
|