fix(saas): harden model group failover + relay reliability
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

- cache: insert-then-retain pattern avoids empty-window race during refresh
- relay: manage_task_status flag for proper failover state transitions
- relay: retry_task re-resolves model groups instead of blind provider reuse
- relay: filter empty-member groups from available models list
- relay: quota cache stale entry cleanup (TTL 5x expiry)
- error: from_sqlx_unique helper for 409 vs 500 distinction
- model_config: unique constraint handling, duplicate member check
- model_config: failover_strategy whitelist, model_id vs group name conflict check
- model_config: group-scoped member removal with group_id validation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
iven
2026-04-04 12:26:55 +08:00
parent 894c0d7b15
commit 5c48d62f7e
6 changed files with 221 additions and 64 deletions

View File

@@ -95,7 +95,7 @@ pub async fn create_provider(db: &PgPool, req: &CreateProviderRequest, enc_key:
)
.bind(&id).bind(&req.name).bind(&req.display_name).bind(&encrypted_api_key)
.bind(&req.base_url).bind(&req.api_protocol).bind(&req.rate_limit_rpm).bind(&req.rate_limit_tpm).bind(&now)
.execute(db).await?;
.execute(db).await.map_err(|e| SaasError::from_sqlx_unique(e, &format!("Provider '{}'", req.name)))?;
get_provider(db, &id).await
}
@@ -210,6 +210,15 @@ pub async fn create_model(db: &PgPool, req: &CreateModelRequest) -> SaasResult<M
)));
}
// M-2: 检查 model_id 不与模型组名冲突(避免路由歧义)
let group_conflict: Option<(String,)> = sqlx::query_as("SELECT id FROM model_groups WHERE name = $1")
.bind(&req.model_id).fetch_optional(db).await?;
if group_conflict.is_some() {
return Err(SaasError::InvalidInput(
format!("模型 ID '{}' 与已有模型组名称冲突,请使用不同的 ID", req.model_id)
));
}
let ctx = req.context_window.unwrap_or(8192);
let max_out = req.max_output_tokens.unwrap_or(4096);
let streaming = req.supports_streaming.unwrap_or(true);
@@ -223,7 +232,7 @@ pub async fn create_model(db: &PgPool, req: &CreateModelRequest) -> SaasResult<M
)
.bind(&id).bind(&req.provider_id).bind(&req.model_id).bind(&req.alias)
.bind(ctx).bind(max_out).bind(streaming).bind(vision).bind(pi).bind(po).bind(&now)
.execute(db).await?;
.execute(db).await.map_err(|e| SaasError::from_sqlx_unique(e, &format!("模型 '{}' 在 Provider '{}'", req.model_id, req.provider_id)))?;
get_model(db, &id).await
}
@@ -548,6 +557,14 @@ pub async fn get_model_group(db: &PgPool, group_id: &str) -> SaasResult<ModelGro
}
pub async fn create_model_group(db: &PgPool, req: &CreateModelGroupRequest) -> SaasResult<ModelGroupInfo> {
// M-S1: failover_strategy 白名单校验
const VALID_STRATEGIES: &[&str] = &["quota_aware", "priority", "random"];
if !VALID_STRATEGIES.contains(&req.failover_strategy.as_str()) {
return Err(SaasError::InvalidInput(
format!("failover_strategy 必须是 {:?} 之一", VALID_STRATEGIES)
));
}
let id = uuid::Uuid::new_v4().to_string();
let now = chrono::Utc::now().to_rfc3339();
@@ -573,7 +590,7 @@ pub async fn create_model_group(db: &PgPool, req: &CreateModelGroupRequest) -> S
)
.bind(&id).bind(&req.name).bind(&req.display_name).bind(&req.description)
.bind(&req.failover_strategy).bind(&now)
.execute(db).await?;
.execute(db).await.map_err(|e| SaasError::from_sqlx_unique(e, &format!("模型组 '{}'", req.name)))?;
get_model_group(db, &id).await
}
@@ -630,13 +647,25 @@ pub async fn add_group_member(
.bind(&req.model_id).fetch_optional(db).await?
.ok_or_else(|| SaasError::NotFound(format!("模型 {} 不存在", req.model_id)))?;
// M-S4: 检查重复成员(避免 DB unique violation 返回 500
let duplicate: Option<(String,)> = sqlx::query_as(
"SELECT id FROM model_group_members WHERE group_id = $1 AND provider_id = $2 AND model_id = $3"
)
.bind(group_id).bind(&req.provider_id).bind(&req.model_id)
.fetch_optional(db).await?;
if duplicate.is_some() {
return Err(SaasError::AlreadyExists(
format!("Provider {} 的模型 {} 已在该模型组中", req.provider_id, req.model_id)
));
}
let id = uuid::Uuid::new_v4().to_string();
sqlx::query(
"INSERT INTO model_group_members (id, group_id, provider_id, model_id, priority, created_at, updated_at)
VALUES ($1, $2, $3, $4, $5, NOW(), NOW())"
)
.bind(&id).bind(group_id).bind(&req.provider_id).bind(&req.model_id).bind(req.priority)
.execute(db).await?;
.execute(db).await.map_err(|e| SaasError::from_sqlx_unique(e, &format!("Provider {} 的模型 {} 在该模型组", req.provider_id, req.model_id)))?;
Ok(ModelGroupMemberInfo {
id,
@@ -647,11 +676,12 @@ pub async fn add_group_member(
})
}
pub async fn remove_group_member(db: &PgPool, member_id: &str) -> SaasResult<()> {
let result = sqlx::query("DELETE FROM model_group_members WHERE id = $1")
.bind(member_id).execute(db).await?;
pub async fn remove_group_member(db: &PgPool, group_id: &str, member_id: &str) -> SaasResult<()> {
// M-5: 验证成员确实属于该组
let result = sqlx::query("DELETE FROM model_group_members WHERE id = $1 AND group_id = $2")
.bind(member_id).bind(group_id).execute(db).await?;
if result.rows_affected() == 0 {
return Err(SaasError::NotFound(format!("成员 {}存在", member_id)));
return Err(SaasError::NotFound(format!("成员 {}属于该模型组", member_id)));
}
Ok(())
}