Phase 1: Anthropic prompt caching - Add cache_control ephemeral on system prompt blocks - Track cache_creation/cache_read tokens in CompletionResponse + StreamChunk Phase 2A: Parallel tool execution - Add ToolConcurrency enum (ReadOnly/Exclusive/Interactive) - JoinSet + Semaphore(3) for bounded parallel tool calls - 7 tools annotated with correct concurrency level - AtomicU32 for lock-free failure tracking in ToolErrorMiddleware Phase 2B: Tool output pruning - prune_tool_outputs() trims old ToolResult > 2000 chars to 500 chars - Integrated into CompactionMiddleware before token estimation Phase 3: Error classification + smart retry - LlmErrorKind + ClassifiedLlmError for structured error mapping - RetryDriver decorator with jittered exponential backoff - Kernel wraps all LLM calls with RetryDriver - CONTEXT_OVERFLOW recovery triggers emergency compaction in loop_runner
140 lines
5.0 KiB
Rust
140 lines
5.0 KiB
Rust
//! LLM 错误分类器。将 HTTP 状态码 + 错误体映射为 LlmErrorKind。
|
|
|
|
use std::time::Duration;
|
|
use zclaw_types::{LlmErrorKind, ClassifiedLlmError};
|
|
|
|
/// 分类 LLM 错误
|
|
pub fn classify_llm_error(
|
|
provider: &str,
|
|
status: u16,
|
|
body: &str,
|
|
is_timeout: bool,
|
|
) -> ClassifiedLlmError {
|
|
let _ = provider; // reserved for per-provider overrides
|
|
|
|
if is_timeout {
|
|
return ClassifiedLlmError {
|
|
kind: LlmErrorKind::Timeout,
|
|
retryable: true,
|
|
should_compress: false,
|
|
should_rotate_credential: false,
|
|
retry_after: None,
|
|
message: "请求超时".to_string(),
|
|
};
|
|
}
|
|
|
|
match status {
|
|
401 | 403 => ClassifiedLlmError {
|
|
kind: LlmErrorKind::Auth,
|
|
retryable: false,
|
|
should_compress: false,
|
|
should_rotate_credential: true,
|
|
retry_after: None,
|
|
message: "认证失败,请检查 API Key".to_string(),
|
|
},
|
|
402 => {
|
|
let is_quota_transient = body.contains("retry")
|
|
|| body.contains("limit")
|
|
|| body.contains("usage");
|
|
ClassifiedLlmError {
|
|
kind: if is_quota_transient { LlmErrorKind::RateLimited } else { LlmErrorKind::BillingExhausted },
|
|
retryable: is_quota_transient,
|
|
should_compress: false,
|
|
should_rotate_credential: !is_quota_transient,
|
|
retry_after: if is_quota_transient { Some(Duration::from_secs(30)) } else { None },
|
|
message: if is_quota_transient { "使用限制,稍后重试".to_string() } else { "计费额度已耗尽".to_string() },
|
|
}
|
|
}
|
|
429 => ClassifiedLlmError {
|
|
kind: LlmErrorKind::RateLimited,
|
|
retryable: true,
|
|
should_compress: false,
|
|
should_rotate_credential: true,
|
|
retry_after: parse_retry_after(body),
|
|
message: "速率限制".to_string(),
|
|
},
|
|
529 => ClassifiedLlmError {
|
|
kind: LlmErrorKind::Overloaded,
|
|
retryable: true,
|
|
should_compress: false,
|
|
should_rotate_credential: false,
|
|
retry_after: Some(Duration::from_secs(5)),
|
|
message: "提供商过载".to_string(),
|
|
},
|
|
500 | 502 => ClassifiedLlmError {
|
|
kind: LlmErrorKind::ServerError,
|
|
retryable: true,
|
|
should_compress: false,
|
|
should_rotate_credential: false,
|
|
retry_after: None,
|
|
message: "服务端错误".to_string(),
|
|
},
|
|
503 => ClassifiedLlmError {
|
|
kind: LlmErrorKind::Overloaded,
|
|
retryable: true,
|
|
should_compress: false,
|
|
should_rotate_credential: false,
|
|
retry_after: Some(Duration::from_secs(3)),
|
|
message: "服务暂时不可用".to_string(),
|
|
},
|
|
400 => {
|
|
let is_context_overflow = body.contains("context_length")
|
|
|| body.contains("max_tokens")
|
|
|| body.contains("too many tokens")
|
|
|| body.contains("prompt is too long");
|
|
ClassifiedLlmError {
|
|
kind: if is_context_overflow { LlmErrorKind::ContextOverflow } else { LlmErrorKind::Unknown },
|
|
retryable: false,
|
|
should_compress: is_context_overflow,
|
|
should_rotate_credential: false,
|
|
retry_after: None,
|
|
message: if is_context_overflow {
|
|
"上下文过长,需要压缩".to_string()
|
|
} else {
|
|
format!("请求错误: {}", &body[..body.len().min(200)])
|
|
},
|
|
}
|
|
}
|
|
404 => ClassifiedLlmError {
|
|
kind: LlmErrorKind::ModelNotFound,
|
|
retryable: false,
|
|
should_compress: false,
|
|
should_rotate_credential: false,
|
|
retry_after: None,
|
|
message: "模型不存在".to_string(),
|
|
},
|
|
_ => ClassifiedLlmError {
|
|
kind: LlmErrorKind::Unknown,
|
|
retryable: true,
|
|
should_compress: false,
|
|
should_rotate_credential: false,
|
|
retry_after: None,
|
|
message: format!("未知错误 ({}) {}", status, &body[..body.len().min(200)]),
|
|
},
|
|
}
|
|
}
|
|
|
|
fn parse_retry_after(body: &str) -> Option<Duration> {
|
|
// Anthropic: "Please retry after X seconds"
|
|
// OpenAI: "Please retry after Xms"
|
|
if let Some(secs) = extract_retry_seconds(body) {
|
|
return Some(Duration::from_secs(secs));
|
|
}
|
|
if let Some(ms) = extract_retry_millis(body) {
|
|
return Some(Duration::from_millis(ms));
|
|
}
|
|
Some(Duration::from_secs(2))
|
|
}
|
|
|
|
fn extract_retry_seconds(body: &str) -> Option<u64> {
|
|
let re = regex::Regex::new(r"retry\s+(?:after\s+)?(\d+)\s*(?:s|sec|seconds?)").ok()?;
|
|
let caps = re.captures(body)?;
|
|
caps[1].parse().ok()
|
|
}
|
|
|
|
fn extract_retry_millis(body: &str) -> Option<u64> {
|
|
let re = regex::Regex::new(r"retry\s+(?:after\s+)?(\d+)\s*ms").ok()?;
|
|
let caps = re.captures(body)?;
|
|
caps[1].parse().ok()
|
|
}
|