fix(saas): harden model group failover + relay reliability
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
- cache: insert-then-retain pattern avoids empty-window race during refresh - relay: manage_task_status flag for proper failover state transitions - relay: retry_task re-resolves model groups instead of blind provider reuse - relay: filter empty-member groups from available models list - relay: quota cache stale entry cleanup (TTL 5x expiry) - error: from_sqlx_unique helper for 409 vs 500 distinction - model_config: unique constraint handling, duplicate member check - model_config: failover_strategy whitelist, model_id vs group name conflict check - model_config: group-scoped member removal with group_id validation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -202,6 +202,9 @@ pub async fn execute_relay(
|
||||
max_attempts: u32,
|
||||
base_delay_ms: u64,
|
||||
enc_key: &[u8; 32],
|
||||
// 当由 `execute_relay_with_failover` 调用时为 false,由外层统一管理 task 状态;
|
||||
// 独立调用时为 true,由本函数管理 task 状态。
|
||||
manage_task_status: bool,
|
||||
) -> SaasResult<RelayResponse> {
|
||||
validate_provider_url(provider_base_url).await?;
|
||||
|
||||
@@ -234,7 +237,7 @@ pub async fn execute_relay(
|
||||
|
||||
for attempt in 0..max_attempts {
|
||||
let is_first = attempt == 0;
|
||||
if is_first {
|
||||
if is_first && manage_task_status {
|
||||
update_task_status(db, task_id, "processing", None, None, None).await?;
|
||||
}
|
||||
|
||||
@@ -254,7 +257,9 @@ pub async fn execute_relay(
|
||||
Err(SaasError::RateLimited(msg)) => {
|
||||
// 所有 Key 均在冷却中
|
||||
let err_msg = format!("Key Pool 耗尽: {}", msg);
|
||||
update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await?;
|
||||
if manage_task_status {
|
||||
update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await?;
|
||||
}
|
||||
return Err(SaasError::RateLimited(msg));
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
@@ -377,8 +382,10 @@ pub async fn execute_relay(
|
||||
} else {
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
let (input_tokens, output_tokens) = extract_token_usage(&body);
|
||||
update_task_status(db, task_id, "completed",
|
||||
Some(input_tokens), Some(output_tokens), None).await?;
|
||||
if manage_task_status {
|
||||
update_task_status(db, task_id, "completed",
|
||||
Some(input_tokens), Some(output_tokens), None).await?;
|
||||
}
|
||||
// 记录 Key 使用量(失败仅记录,不阻塞响应)
|
||||
if let Err(e) = super::key_pool::record_key_usage(
|
||||
db, &key_id, Some(input_tokens + output_tokens),
|
||||
@@ -411,7 +418,9 @@ pub async fn execute_relay(
|
||||
"Key Pool 轮转耗尽 ({} attempts),所有 Key 均被限流",
|
||||
max_attempts
|
||||
);
|
||||
update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await?;
|
||||
if manage_task_status {
|
||||
update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await?;
|
||||
}
|
||||
return Err(SaasError::RateLimited(err_msg));
|
||||
}
|
||||
|
||||
@@ -425,7 +434,9 @@ pub async fn execute_relay(
|
||||
if !is_retryable_status(status) || attempt + 1 >= max_attempts {
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
let err_msg = format!("上游返回 HTTP {}: {}", status, &body[..body.len().min(500)]);
|
||||
update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await?;
|
||||
if manage_task_status {
|
||||
update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await?;
|
||||
}
|
||||
return Err(SaasError::Relay(err_msg));
|
||||
}
|
||||
tracing::warn!(
|
||||
@@ -436,7 +447,9 @@ pub async fn execute_relay(
|
||||
Err(e) => {
|
||||
if !is_retryable_error(&e) || attempt + 1 >= max_attempts {
|
||||
let err_msg = format!("请求上游失败: {}", e);
|
||||
update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await?;
|
||||
if manage_task_status {
|
||||
update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await?;
|
||||
}
|
||||
return Err(SaasError::Relay(err_msg));
|
||||
}
|
||||
tracing::warn!(
|
||||
@@ -479,6 +492,9 @@ pub async fn execute_relay_with_failover(
|
||||
let failover_start = std::time::Instant::now();
|
||||
const FAILOVER_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
// C-3: 外层统一管理 task 状态 — 仅设一次 "processing"
|
||||
update_task_status(db, task_id, "processing", None, None, None).await?;
|
||||
|
||||
for (idx, candidate) in candidates.iter().enumerate() {
|
||||
// M-3: 超时预算检查 — 防止级联失败累积过长
|
||||
if failover_start.elapsed() >= FAILOVER_TIMEOUT {
|
||||
@@ -502,6 +518,7 @@ pub async fn execute_relay_with_failover(
|
||||
max_attempts_per_provider,
|
||||
base_delay_ms,
|
||||
enc_key,
|
||||
false, // C-3: 外层管理 task 状态
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -542,9 +559,15 @@ pub async fn execute_relay_with_failover(
|
||||
}
|
||||
}
|
||||
|
||||
Err(last_error.unwrap_or(SaasError::RateLimited(
|
||||
// C-3: 所有候选失败 — 外层统一标记 task 为 "failed"
|
||||
let final_error = last_error.unwrap_or_else(|| SaasError::RateLimited(
|
||||
"所有候选 Provider 均不可用".into(),
|
||||
)))
|
||||
));
|
||||
let err_msg = format!("{}", final_error);
|
||||
if let Err(e) = update_task_status(db, task_id, "failed", None, None, Some(&err_msg)).await {
|
||||
tracing::warn!("Failed to update task {} status after failover exhaustion: {}", task_id, e);
|
||||
}
|
||||
Err(final_error)
|
||||
}
|
||||
|
||||
/// 替换 JSON body 中的 "model" 字段为当前候选的物理模型 ID
|
||||
@@ -627,12 +650,15 @@ pub async fn sort_candidates_by_quota(
|
||||
|
||||
let map: HashMap<String, i64> = quota_rows.into_iter().collect();
|
||||
|
||||
// 更新缓存
|
||||
// 更新缓存 + 清理过期条目
|
||||
{
|
||||
let mut cache_guard = cache.lock().unwrap();
|
||||
for (pid, remaining) in &map {
|
||||
cache_guard.insert(pid.clone(), (*remaining, now));
|
||||
}
|
||||
// M-S3: 清理超过 TTL 5x(25s)的陈旧条目,防止已删除 Provider 的条目永久残留
|
||||
let ttl_5x = QUOTA_CACHE_TTL * 5;
|
||||
cache_guard.retain(|_, (_, ts)| now.saturating_duration_since(*ts) < ttl_5x);
|
||||
}
|
||||
|
||||
map
|
||||
|
||||
Reference in New Issue
Block a user