fix(relay,store): Provider Key 自动恢复 + Agent 创建友好错误 + 登录后重连
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

P0-1: key_pool.rs 新增 cooldown 过期 Key 自动恢复逻辑。
当所有 Key 的 is_active=false 且 cooldown_until 已过期时,
自动重新激活并重试选择,避免 relay/models 返回空数组导致聊天失败。

P0-2: agentStore.ts createClone/createFromTemplate 错误信息
从原始 HTTP 错误改为可操作的中文提示(502/503/401 分类处理)。

P1-2: auth.ts login 成功后触发 connectionStore.connect(),
确保 kernel 使用新 JWT 而非旧 token。
This commit is contained in:
iven
2026-04-19 13:16:12 +08:00
parent 8b1b08be82
commit 4ee587d070
3 changed files with 98 additions and 8 deletions

View File

@@ -142,13 +142,13 @@ pub async fn select_best_key(db: &PgPool, provider_id: &str, enc_key: &[u8; 32])
return Ok(selection);
}
// 所有 Key 都超限或无 Key — 先检查是否存在活跃 Key
let has_any_key: Option<(bool,)> = sqlx::query_as(
// 所有活跃 Key 都超限 — 先检查是否存在活跃 Key
let has_any_active: Option<(bool,)> = sqlx::query_as(
"SELECT COUNT(*) > 0 FROM provider_keys WHERE provider_id = $1 AND is_active = TRUE"
).bind(provider_id).fetch_optional(db).await?;
if has_any_key.is_some_and(|(b,)| b) {
// 有 key 但全部 cooldown 或超限 — 检查最快恢复时间
if has_any_active.is_some_and(|(b,)| b) {
// 有活跃 key 但全部 cooldown 或超限 — 检查最快恢复时间
let cooldown_row: Option<(String,)> = sqlx::query_as(
"SELECT cooldown_until::TEXT FROM provider_keys
WHERE provider_id = $1 AND is_active = TRUE AND cooldown_until IS NOT NULL AND cooldown_until::timestamptz > $2
@@ -169,7 +169,64 @@ pub async fn select_best_key(db: &PgPool, provider_id: &str, enc_key: &[u8; 32])
));
}
Err(SaasError::NotFound(format!("Provider {} 没有可用的 API Key", provider_id)))
// 没有活跃 Key — 自动恢复 cooldown 已过期但 is_active=false 的 Key
let reactivated: Option<(i64,)> = sqlx::query_as(
"UPDATE provider_keys SET is_active = TRUE, cooldown_until = NULL, updated_at = NOW()
WHERE provider_id = $1 AND is_active = FALSE
AND (cooldown_until IS NOT NULL AND cooldown_until::timestamptz <= $2)
RETURNING (SELECT COUNT(*) FROM provider_keys WHERE provider_id = $1 AND is_active = TRUE)"
).bind(provider_id).bind(&now).fetch_optional(db).await?;
if let Some((active_count,)) = &reactivated {
if *active_count > 0 {
tracing::info!(
"Provider {} 自动恢复了 {} 个 cooldown 过期的 Key重试选择",
provider_id, active_count
);
invalidate_cache(provider_id);
// 重试查询(不用递归,直接再走一次查询逻辑)
let retry_rows: Vec<(String, String, i32, Option<i64>, Option<i64>, Option<i64>, Option<i64>)> =
sqlx::query_as(
"SELECT pk.id, pk.key_value, pk.priority, pk.max_rpm, pk.max_tpm,
COALESCE(SUM(uw.request_count), 0)::bigint,
COALESCE(SUM(uw.token_count), 0)::bigint
FROM provider_keys pk
LEFT JOIN key_usage_window uw ON pk.id = uw.key_id
AND uw.window_minute >= to_char(NOW() - INTERVAL '1 minute', 'YYYY-MM-DDTHH24:MI')
WHERE pk.provider_id = $1 AND pk.is_active = TRUE
AND (pk.cooldown_until IS NULL OR pk.cooldown_until::timestamptz <= $2)
GROUP BY pk.id, pk.key_value, pk.priority, pk.max_rpm, pk.max_tpm
ORDER BY pk.priority ASC, pk.last_used_at ASC NULLS FIRST"
).bind(provider_id).bind(&now).fetch_all(db).await?;
for (id, key_value, _priority, max_rpm, max_tpm, req_count, token_count) in &retry_rows {
if let Some(rpm_limit) = max_rpm {
if *rpm_limit > 0 && req_count.unwrap_or(0) >= *rpm_limit { continue; }
}
if let Some(tpm_limit) = max_tpm {
if *tpm_limit > 0 && token_count.unwrap_or(0) >= *tpm_limit { continue; }
}
let decrypted_kv = match decrypt_key_value(key_value, enc_key) {
Ok(v) => v,
Err(_) => continue,
};
let selection = KeySelection {
key: PoolKey { id: id.clone(), key_value: decrypted_kv, priority: *_priority, max_rpm: *max_rpm, max_tpm: *max_tpm },
key_id: id.clone(),
};
get_cache().insert(provider_id.to_string(), CachedSelection {
selection: selection.clone(),
cached_at: Instant::now(),
});
return Ok(selection);
}
}
}
Err(SaasError::NotFound(format!(
"Provider {} 没有可用的 API Key所有 Key 已停用,请在管理后台激活)",
provider_id
)))
}
/// 记录 Key 使用量(滑动窗口)