feat(saas): add model groups for cross-provider failover
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Model Groups provide logical model names that map to multiple physical models across providers, with automatic failover when one provider's key pool is exhausted. Backend: - New model_groups + model_group_members tables with FK constraints - Full CRUD API (7 endpoints) with admin-only write permissions - Cache layer: DashMap-backed CachedModelGroup with load_from_db - Relay integration: ModelResolution enum for Direct/Group routing - Cross-provider failover: sort_candidates_by_quota + OnceLock cache - Relay failure path: record failure usage + relay_dequeue (fixes queue counter leak that caused connection pool exhaustion) - add_group_member: validate model_id exists before insert Frontend: - saas-relay-client: accept getModel() callback for dynamic model selection - connectionStore: prefer conversationStore.currentModel over first available Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -37,15 +37,39 @@ pub struct CachedProvider {
|
||||
pub enabled: bool,
|
||||
}
|
||||
|
||||
// ============ Model Group 缓存(跨 Provider Failover) ============
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CachedModelGroup {
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub display_name: String,
|
||||
pub description: String,
|
||||
pub enabled: bool,
|
||||
pub failover_strategy: String,
|
||||
pub members: Vec<CachedGroupMember>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CachedGroupMember {
|
||||
pub id: String,
|
||||
pub provider_id: String,
|
||||
pub model_id: String,
|
||||
pub priority: i32,
|
||||
pub enabled: bool,
|
||||
}
|
||||
|
||||
// ============ 聚合缓存结构 ============
|
||||
|
||||
/// 全局缓存,持有 Model / Provider / 队列计数器
|
||||
/// 全局缓存,持有 Model / Provider / Model Groups / 队列计数器
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AppCache {
|
||||
/// model_id → CachedModel (key 是 models.model_id,不是 id)
|
||||
pub models: Arc<DashMap<String, CachedModel>>,
|
||||
/// provider id → CachedProvider
|
||||
pub providers: Arc<DashMap<String, CachedProvider>>,
|
||||
/// model group name → CachedModelGroup(逻辑模型名到候选列表的映射)
|
||||
pub model_groups: Arc<DashMap<String, CachedModelGroup>>,
|
||||
/// account_id → 当前排队/处理中的任务数
|
||||
pub relay_queue_counts: Arc<DashMap<String, Arc<AtomicI64>>>,
|
||||
}
|
||||
@@ -55,6 +79,7 @@ impl AppCache {
|
||||
Self {
|
||||
models: Arc::new(DashMap::new()),
|
||||
providers: Arc::new(DashMap::new()),
|
||||
model_groups: Arc::new(DashMap::new()),
|
||||
relay_queue_counts: Arc::new(DashMap::new()),
|
||||
}
|
||||
}
|
||||
@@ -104,10 +129,44 @@ impl AppCache {
|
||||
});
|
||||
}
|
||||
|
||||
// Load model groups with members
|
||||
let group_rows: Vec<(String, String, String, String, bool, String)> = sqlx::query_as(
|
||||
"SELECT id, name, display_name, COALESCE(description, ''), enabled, COALESCE(failover_strategy, 'quota_aware') FROM model_groups"
|
||||
).fetch_all(db).await?;
|
||||
|
||||
let member_rows: Vec<(String, String, String, String, i32, bool)> = sqlx::query_as(
|
||||
"SELECT id, group_id, provider_id, model_id, priority, enabled \
|
||||
FROM model_group_members ORDER BY priority ASC"
|
||||
).fetch_all(db).await?;
|
||||
|
||||
self.model_groups.clear();
|
||||
for (id, name, display_name, description, enabled, failover_strategy) in &group_rows {
|
||||
let members: Vec<CachedGroupMember> = member_rows.iter()
|
||||
.filter(|(_, gid, _, _, _, _)| gid == id)
|
||||
.map(|(mid, _, pid, mid2, pri, en)| CachedGroupMember {
|
||||
id: mid.clone(),
|
||||
provider_id: pid.clone(),
|
||||
model_id: mid2.clone(),
|
||||
priority: *pri,
|
||||
enabled: *en,
|
||||
})
|
||||
.collect();
|
||||
self.model_groups.insert(name.clone(), CachedModelGroup {
|
||||
id: id.clone(),
|
||||
name: name.clone(),
|
||||
display_name: display_name.clone(),
|
||||
description: description.clone(),
|
||||
enabled: *enabled,
|
||||
failover_strategy: failover_strategy.clone(),
|
||||
members,
|
||||
});
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"Cache loaded: {} providers, {} models",
|
||||
"Cache loaded: {} providers, {} models, {} model groups",
|
||||
self.providers.len(),
|
||||
self.models.len()
|
||||
self.models.len(),
|
||||
self.model_groups.len()
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
@@ -183,6 +242,13 @@ impl AppCache {
|
||||
.map(|r| r.value().clone())
|
||||
}
|
||||
|
||||
/// 按逻辑模型名查找已启用的模型组。O(1) DashMap 查找。
|
||||
pub fn get_model_group(&self, name: &str) -> Option<CachedModelGroup> {
|
||||
self.model_groups.get(name)
|
||||
.filter(|g| g.enabled)
|
||||
.map(|r| r.value().clone())
|
||||
}
|
||||
|
||||
// ============ 缓存失效 ============
|
||||
|
||||
/// 清除 model 缓存中的指定条目(Admin CRUD 后调用)
|
||||
@@ -204,4 +270,9 @@ impl AppCache {
|
||||
pub fn invalidate_all_providers(&self) {
|
||||
self.providers.clear();
|
||||
}
|
||||
|
||||
/// 清除全部 model group 缓存
|
||||
pub fn invalidate_all_model_groups(&self) {
|
||||
self.model_groups.clear();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user