fix(relay): API Key 解密失败自愈 — 启动迁移 + 容错跳过
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
根因: select_best_key 遇到解密失败时直接 500 返回, 不会尝试下一个 key。如果 DB 中有旧的加密格式 key, 整个 relay 请求被阻断。 修复: - key_pool: 解密失败时 warn + skip 到下一个 key,不再 500 - key_pool: 新增 heal_provider_keys() 启动自愈迁移 - 逐个尝试解密所有加密 key - 解密成功 → 用当前密钥重新加密(幂等) - 解密失败 → 标记 is_active=false + warn - main.rs: 启动时调用自愈迁移(在 TOTP 迁移之后)
This commit is contained in:
@@ -99,6 +99,8 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
if let Err(e) = zclaw_saas::crypto::migrate_legacy_totp_secrets(&db, &enc_key).await {
|
if let Err(e) = zclaw_saas::crypto::migrate_legacy_totp_secrets(&db, &enc_key).await {
|
||||||
tracing::warn!("TOTP legacy migration check failed: {}", e);
|
tracing::warn!("TOTP legacy migration check failed: {}", e);
|
||||||
}
|
}
|
||||||
|
// Self-heal: re-encrypt provider keys with current key
|
||||||
|
zclaw_saas::relay::key_pool::heal_provider_keys(&db, &enc_key).await;
|
||||||
} else {
|
} else {
|
||||||
drop(config_for_migration);
|
drop(config_for_migration);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -117,7 +117,13 @@ pub async fn select_best_key(db: &PgPool, provider_id: &str, enc_key: &[u8; 32])
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 此 Key 可用 — 解密 key_value
|
// 此 Key 可用 — 解密 key_value
|
||||||
let decrypted_kv = decrypt_key_value(key_value, enc_key)?;
|
let decrypted_kv = match decrypt_key_value(key_value, enc_key) {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!("Key {} decryption failed, skipping: {}", id, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
let selection = KeySelection {
|
let selection = KeySelection {
|
||||||
key: PoolKey {
|
key: PoolKey {
|
||||||
id: id.clone(),
|
id: id.clone(),
|
||||||
@@ -371,3 +377,52 @@ fn parse_cooldown_remaining(cooldown_until: &str, now: &str) -> i64 {
|
|||||||
_ => 60, // 默认 60 秒
|
_ => 60, // 默认 60 秒
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Startup self-healing: re-encrypt all provider keys with current encryption key.
|
||||||
|
///
|
||||||
|
/// For each encrypted key, attempts decryption with the current key.
|
||||||
|
/// If decryption succeeds, re-encrypts and updates in-place (idempotent).
|
||||||
|
/// If decryption fails, logs a warning and marks the key inactive.
|
||||||
|
pub async fn heal_provider_keys(db: &PgPool, enc_key: &[u8; 32]) -> usize {
|
||||||
|
let rows: Vec<(String, String)> = sqlx::query_as(
|
||||||
|
"SELECT id, key_value FROM provider_keys WHERE key_value LIKE 'enc:%'"
|
||||||
|
).fetch_all(db).await.unwrap_or_default();
|
||||||
|
|
||||||
|
let mut healed = 0usize;
|
||||||
|
let mut failed = 0usize;
|
||||||
|
|
||||||
|
for (id, key_value) in &rows {
|
||||||
|
match crypto::decrypt_value(key_value, enc_key) {
|
||||||
|
Ok(plaintext) => {
|
||||||
|
// Re-encrypt with current key (idempotent if same key)
|
||||||
|
match crypto::encrypt_value(&plaintext, enc_key) {
|
||||||
|
Ok(new_encrypted) => {
|
||||||
|
if let Err(e) = sqlx::query(
|
||||||
|
"UPDATE provider_keys SET key_value = $1 WHERE id = $2"
|
||||||
|
).bind(&new_encrypted).bind(id).execute(db).await {
|
||||||
|
tracing::warn!("[heal] Failed to update key {}: {}", id, e);
|
||||||
|
} else {
|
||||||
|
healed += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!("[heal] Failed to re-encrypt key {}: {}", id, e);
|
||||||
|
failed += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!("[heal] Cannot decrypt key {}, marking inactive: {}", id, e);
|
||||||
|
let _ = sqlx::query(
|
||||||
|
"UPDATE provider_keys SET is_active = FALSE WHERE id = $1"
|
||||||
|
).bind(id).execute(db).await;
|
||||||
|
failed += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if healed > 0 || failed > 0 {
|
||||||
|
tracing::info!("[heal] Provider keys: {} re-encrypted, {} failed", healed, failed);
|
||||||
|
}
|
||||||
|
healed
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user