fix(relay): API Key 解密失败自愈 — 启动迁移 + 容错跳过
Some checks are pending
CI / Lint & TypeCheck (push) Waiting to run
CI / Unit Tests (push) Waiting to run
CI / Build Frontend (push) Waiting to run
CI / Rust Check (push) Waiting to run
CI / Security Scan (push) Waiting to run
CI / E2E Tests (push) Blocked by required conditions
Some checks are pending
CI / Lint & TypeCheck (push) Waiting to run
CI / Unit Tests (push) Waiting to run
CI / Build Frontend (push) Waiting to run
CI / Rust Check (push) Waiting to run
CI / Security Scan (push) Waiting to run
CI / E2E Tests (push) Blocked by required conditions
根因: select_best_key 遇到解密失败时直接 500 返回, 不会尝试下一个 key。如果 DB 中有旧的加密格式 key, 整个 relay 请求被阻断。 修复: - key_pool: 解密失败时 warn + skip 到下一个 key,不再 500 - key_pool: 新增 heal_provider_keys() 启动自愈迁移 - 逐个尝试解密所有加密 key - 解密成功 → 用当前密钥重新加密(幂等) - 解密失败 → 标记 is_active=false + warn - main.rs: 启动时调用自愈迁移(在 TOTP 迁移之后)
This commit is contained in:
@@ -99,6 +99,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
if let Err(e) = zclaw_saas::crypto::migrate_legacy_totp_secrets(&db, &enc_key).await {
|
||||
tracing::warn!("TOTP legacy migration check failed: {}", e);
|
||||
}
|
||||
// Self-heal: re-encrypt provider keys with current key
|
||||
zclaw_saas::relay::key_pool::heal_provider_keys(&db, &enc_key).await;
|
||||
} else {
|
||||
drop(config_for_migration);
|
||||
}
|
||||
|
||||
@@ -117,7 +117,13 @@ pub async fn select_best_key(db: &PgPool, provider_id: &str, enc_key: &[u8; 32])
|
||||
}
|
||||
|
||||
// 此 Key 可用 — 解密 key_value
|
||||
let decrypted_kv = decrypt_key_value(key_value, enc_key)?;
|
||||
let decrypted_kv = match decrypt_key_value(key_value, enc_key) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
tracing::warn!("Key {} decryption failed, skipping: {}", id, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let selection = KeySelection {
|
||||
key: PoolKey {
|
||||
id: id.clone(),
|
||||
@@ -371,3 +377,52 @@ fn parse_cooldown_remaining(cooldown_until: &str, now: &str) -> i64 {
|
||||
_ => 60, // 默认 60 秒
|
||||
}
|
||||
}
|
||||
|
||||
/// Startup self-healing: re-encrypt all provider keys with current encryption key.
|
||||
///
|
||||
/// For each encrypted key, attempts decryption with the current key.
|
||||
/// If decryption succeeds, re-encrypts and updates in-place (idempotent).
|
||||
/// If decryption fails, logs a warning and marks the key inactive.
|
||||
pub async fn heal_provider_keys(db: &PgPool, enc_key: &[u8; 32]) -> usize {
|
||||
let rows: Vec<(String, String)> = sqlx::query_as(
|
||||
"SELECT id, key_value FROM provider_keys WHERE key_value LIKE 'enc:%'"
|
||||
).fetch_all(db).await.unwrap_or_default();
|
||||
|
||||
let mut healed = 0usize;
|
||||
let mut failed = 0usize;
|
||||
|
||||
for (id, key_value) in &rows {
|
||||
match crypto::decrypt_value(key_value, enc_key) {
|
||||
Ok(plaintext) => {
|
||||
// Re-encrypt with current key (idempotent if same key)
|
||||
match crypto::encrypt_value(&plaintext, enc_key) {
|
||||
Ok(new_encrypted) => {
|
||||
if let Err(e) = sqlx::query(
|
||||
"UPDATE provider_keys SET key_value = $1 WHERE id = $2"
|
||||
).bind(&new_encrypted).bind(id).execute(db).await {
|
||||
tracing::warn!("[heal] Failed to update key {}: {}", id, e);
|
||||
} else {
|
||||
healed += 1;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("[heal] Failed to re-encrypt key {}: {}", id, e);
|
||||
failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("[heal] Cannot decrypt key {}, marking inactive: {}", id, e);
|
||||
let _ = sqlx::query(
|
||||
"UPDATE provider_keys SET is_active = FALSE WHERE id = $1"
|
||||
).bind(id).execute(db).await;
|
||||
failed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if healed > 0 || failed > 0 {
|
||||
tracing::info!("[heal] Provider keys: {} re-encrypted, {} failed", healed, failed);
|
||||
}
|
||||
healed
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user