Files
zclaw_openfang/crates/zclaw-runtime/src/driver/error_classifier.rs
iven 9060935401 perf(runtime): Hermes Phase 1-3 — prompt caching + parallel tools + smart retry
Phase 1: Anthropic prompt caching
- Add cache_control ephemeral on system prompt blocks
- Track cache_creation/cache_read tokens in CompletionResponse + StreamChunk

Phase 2A: Parallel tool execution
- Add ToolConcurrency enum (ReadOnly/Exclusive/Interactive)
- JoinSet + Semaphore(3) for bounded parallel tool calls
- 7 tools annotated with correct concurrency level
- AtomicU32 for lock-free failure tracking in ToolErrorMiddleware

Phase 2B: Tool output pruning
- prune_tool_outputs() trims old ToolResult > 2000 chars to 500 chars
- Integrated into CompactionMiddleware before token estimation

Phase 3: Error classification + smart retry
- LlmErrorKind + ClassifiedLlmError for structured error mapping
- RetryDriver decorator with jittered exponential backoff
- Kernel wraps all LLM calls with RetryDriver
- CONTEXT_OVERFLOW recovery triggers emergency compaction in loop_runner
2026-04-24 08:39:56 +08:00

140 lines
5.0 KiB
Rust

//! LLM 错误分类器。将 HTTP 状态码 + 错误体映射为 LlmErrorKind。
use std::time::Duration;
use zclaw_types::{LlmErrorKind, ClassifiedLlmError};
/// 分类 LLM 错误
pub fn classify_llm_error(
provider: &str,
status: u16,
body: &str,
is_timeout: bool,
) -> ClassifiedLlmError {
let _ = provider; // reserved for per-provider overrides
if is_timeout {
return ClassifiedLlmError {
kind: LlmErrorKind::Timeout,
retryable: true,
should_compress: false,
should_rotate_credential: false,
retry_after: None,
message: "请求超时".to_string(),
};
}
match status {
401 | 403 => ClassifiedLlmError {
kind: LlmErrorKind::Auth,
retryable: false,
should_compress: false,
should_rotate_credential: true,
retry_after: None,
message: "认证失败,请检查 API Key".to_string(),
},
402 => {
let is_quota_transient = body.contains("retry")
|| body.contains("limit")
|| body.contains("usage");
ClassifiedLlmError {
kind: if is_quota_transient { LlmErrorKind::RateLimited } else { LlmErrorKind::BillingExhausted },
retryable: is_quota_transient,
should_compress: false,
should_rotate_credential: !is_quota_transient,
retry_after: if is_quota_transient { Some(Duration::from_secs(30)) } else { None },
message: if is_quota_transient { "使用限制,稍后重试".to_string() } else { "计费额度已耗尽".to_string() },
}
}
429 => ClassifiedLlmError {
kind: LlmErrorKind::RateLimited,
retryable: true,
should_compress: false,
should_rotate_credential: true,
retry_after: parse_retry_after(body),
message: "速率限制".to_string(),
},
529 => ClassifiedLlmError {
kind: LlmErrorKind::Overloaded,
retryable: true,
should_compress: false,
should_rotate_credential: false,
retry_after: Some(Duration::from_secs(5)),
message: "提供商过载".to_string(),
},
500 | 502 => ClassifiedLlmError {
kind: LlmErrorKind::ServerError,
retryable: true,
should_compress: false,
should_rotate_credential: false,
retry_after: None,
message: "服务端错误".to_string(),
},
503 => ClassifiedLlmError {
kind: LlmErrorKind::Overloaded,
retryable: true,
should_compress: false,
should_rotate_credential: false,
retry_after: Some(Duration::from_secs(3)),
message: "服务暂时不可用".to_string(),
},
400 => {
let is_context_overflow = body.contains("context_length")
|| body.contains("max_tokens")
|| body.contains("too many tokens")
|| body.contains("prompt is too long");
ClassifiedLlmError {
kind: if is_context_overflow { LlmErrorKind::ContextOverflow } else { LlmErrorKind::Unknown },
retryable: false,
should_compress: is_context_overflow,
should_rotate_credential: false,
retry_after: None,
message: if is_context_overflow {
"上下文过长,需要压缩".to_string()
} else {
format!("请求错误: {}", &body[..body.len().min(200)])
},
}
}
404 => ClassifiedLlmError {
kind: LlmErrorKind::ModelNotFound,
retryable: false,
should_compress: false,
should_rotate_credential: false,
retry_after: None,
message: "模型不存在".to_string(),
},
_ => ClassifiedLlmError {
kind: LlmErrorKind::Unknown,
retryable: true,
should_compress: false,
should_rotate_credential: false,
retry_after: None,
message: format!("未知错误 ({}) {}", status, &body[..body.len().min(200)]),
},
}
}
fn parse_retry_after(body: &str) -> Option<Duration> {
// Anthropic: "Please retry after X seconds"
// OpenAI: "Please retry after Xms"
if let Some(secs) = extract_retry_seconds(body) {
return Some(Duration::from_secs(secs));
}
if let Some(ms) = extract_retry_millis(body) {
return Some(Duration::from_millis(ms));
}
Some(Duration::from_secs(2))
}
fn extract_retry_seconds(body: &str) -> Option<u64> {
let re = regex::Regex::new(r"retry\s+(?:after\s+)?(\d+)\s*(?:s|sec|seconds?)").ok()?;
let caps = re.captures(body)?;
caps[1].parse().ok()
}
fn extract_retry_millis(body: &str) -> Option<u64> {
let re = regex::Regex::new(r"retry\s+(?:after\s+)?(\d+)\s*ms").ok()?;
let caps = re.captures(body)?;
caps[1].parse().ok()
}