fix(relay): API Key 解密失败自愈 — 启动迁移 + 容错跳过
Some checks are pending
CI / Lint & TypeCheck (push) Waiting to run
CI / Unit Tests (push) Waiting to run
CI / Build Frontend (push) Waiting to run
CI / Rust Check (push) Waiting to run
CI / Security Scan (push) Waiting to run
CI / E2E Tests (push) Blocked by required conditions

根因: select_best_key 遇到解密失败时直接 500 返回,
不会尝试下一个 key。如果 DB 中有旧的加密格式 key,
整个 relay 请求被阻断。

修复:
- key_pool: 解密失败时 warn + skip 到下一个 key,不再 500
- key_pool: 新增 heal_provider_keys() 启动自愈迁移
  - 逐个尝试解密所有加密 key
  - 解密成功 → 用当前密钥重新加密(幂等)
  - 解密失败 → 标记 is_active=false + warn
- main.rs: 启动时调用自愈迁移(在 TOTP 迁移之后)
This commit is contained in:
iven
2026-04-16 02:40:44 +08:00
parent 7dea456fda
commit b69dc6115d
2 changed files with 58 additions and 1 deletions

View File

@@ -99,6 +99,8 @@ async fn main() -> anyhow::Result<()> {
if let Err(e) = zclaw_saas::crypto::migrate_legacy_totp_secrets(&db, &enc_key).await {
tracing::warn!("TOTP legacy migration check failed: {}", e);
}
// Self-heal: re-encrypt provider keys with current key
zclaw_saas::relay::key_pool::heal_provider_keys(&db, &enc_key).await;
} else {
drop(config_for_migration);
}

View File

@@ -117,7 +117,13 @@ pub async fn select_best_key(db: &PgPool, provider_id: &str, enc_key: &[u8; 32])
}
// 此 Key 可用 — 解密 key_value
let decrypted_kv = decrypt_key_value(key_value, enc_key)?;
let decrypted_kv = match decrypt_key_value(key_value, enc_key) {
Ok(v) => v,
Err(e) => {
tracing::warn!("Key {} decryption failed, skipping: {}", id, e);
continue;
}
};
let selection = KeySelection {
key: PoolKey {
id: id.clone(),
@@ -371,3 +377,52 @@ fn parse_cooldown_remaining(cooldown_until: &str, now: &str) -> i64 {
_ => 60, // 默认 60 秒
}
}
/// Startup self-healing: re-encrypt all provider keys with current encryption key.
///
/// For each encrypted key, attempts decryption with the current key.
/// If decryption succeeds, re-encrypts and updates in-place (idempotent).
/// If decryption fails, logs a warning and marks the key inactive.
pub async fn heal_provider_keys(db: &PgPool, enc_key: &[u8; 32]) -> usize {
let rows: Vec<(String, String)> = sqlx::query_as(
"SELECT id, key_value FROM provider_keys WHERE key_value LIKE 'enc:%'"
).fetch_all(db).await.unwrap_or_default();
let mut healed = 0usize;
let mut failed = 0usize;
for (id, key_value) in &rows {
match crypto::decrypt_value(key_value, enc_key) {
Ok(plaintext) => {
// Re-encrypt with current key (idempotent if same key)
match crypto::encrypt_value(&plaintext, enc_key) {
Ok(new_encrypted) => {
if let Err(e) = sqlx::query(
"UPDATE provider_keys SET key_value = $1 WHERE id = $2"
).bind(&new_encrypted).bind(id).execute(db).await {
tracing::warn!("[heal] Failed to update key {}: {}", id, e);
} else {
healed += 1;
}
}
Err(e) => {
tracing::warn!("[heal] Failed to re-encrypt key {}: {}", id, e);
failed += 1;
}
}
}
Err(e) => {
tracing::warn!("[heal] Cannot decrypt key {}, marking inactive: {}", id, e);
let _ = sqlx::query(
"UPDATE provider_keys SET is_active = FALSE WHERE id = $1"
).bind(id).execute(db).await;
failed += 1;
}
}
}
if healed > 0 || failed > 0 {
tracing::info!("[heal] Provider keys: {} re-encrypted, {} failed", healed, failed);
}
healed
}