From b69dc6115dcd6ef76baff6fd9d919cf4dd021b2e Mon Sep 17 00:00:00 2001 From: iven Date: Thu, 16 Apr 2026 02:40:44 +0800 Subject: [PATCH] =?UTF-8?q?fix(relay):=20API=20Key=20=E8=A7=A3=E5=AF=86?= =?UTF-8?q?=E5=A4=B1=E8=B4=A5=E8=87=AA=E6=84=88=20=E2=80=94=20=E5=90=AF?= =?UTF-8?q?=E5=8A=A8=E8=BF=81=E7=A7=BB=20+=20=E5=AE=B9=E9=94=99=E8=B7=B3?= =?UTF-8?q?=E8=BF=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 根因: select_best_key 遇到解密失败时直接 500 返回, 不会尝试下一个 key。如果 DB 中有旧的加密格式 key, 整个 relay 请求被阻断。 修复: - key_pool: 解密失败时 warn + skip 到下一个 key,不再 500 - key_pool: 新增 heal_provider_keys() 启动自愈迁移 - 逐个尝试解密所有加密 key - 解密成功 → 用当前密钥重新加密(幂等) - 解密失败 → 标记 is_active=false + warn - main.rs: 启动时调用自愈迁移(在 TOTP 迁移之后) --- crates/zclaw-saas/src/main.rs | 2 + crates/zclaw-saas/src/relay/key_pool.rs | 57 ++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/crates/zclaw-saas/src/main.rs b/crates/zclaw-saas/src/main.rs index e4d8095..92eca44 100644 --- a/crates/zclaw-saas/src/main.rs +++ b/crates/zclaw-saas/src/main.rs @@ -99,6 +99,8 @@ async fn main() -> anyhow::Result<()> { if let Err(e) = zclaw_saas::crypto::migrate_legacy_totp_secrets(&db, &enc_key).await { tracing::warn!("TOTP legacy migration check failed: {}", e); } + // Self-heal: re-encrypt provider keys with current key + zclaw_saas::relay::key_pool::heal_provider_keys(&db, &enc_key).await; } else { drop(config_for_migration); } diff --git a/crates/zclaw-saas/src/relay/key_pool.rs b/crates/zclaw-saas/src/relay/key_pool.rs index 4f6267a..3718548 100644 --- a/crates/zclaw-saas/src/relay/key_pool.rs +++ b/crates/zclaw-saas/src/relay/key_pool.rs @@ -117,7 +117,13 @@ pub async fn select_best_key(db: &PgPool, provider_id: &str, enc_key: &[u8; 32]) } // 此 Key 可用 — 解密 key_value - let decrypted_kv = decrypt_key_value(key_value, enc_key)?; + let decrypted_kv = match decrypt_key_value(key_value, enc_key) { + Ok(v) => v, + Err(e) => { + tracing::warn!("Key {} decryption failed, skipping: {}", id, e); + continue; + } + }; let selection = KeySelection { key: PoolKey { id: id.clone(), @@ -371,3 +377,52 @@ fn parse_cooldown_remaining(cooldown_until: &str, now: &str) -> i64 { _ => 60, // 默认 60 秒 } } + +/// Startup self-healing: re-encrypt all provider keys with current encryption key. +/// +/// For each encrypted key, attempts decryption with the current key. +/// If decryption succeeds, re-encrypts and updates in-place (idempotent). +/// If decryption fails, logs a warning and marks the key inactive. +pub async fn heal_provider_keys(db: &PgPool, enc_key: &[u8; 32]) -> usize { + let rows: Vec<(String, String)> = sqlx::query_as( + "SELECT id, key_value FROM provider_keys WHERE key_value LIKE 'enc:%'" + ).fetch_all(db).await.unwrap_or_default(); + + let mut healed = 0usize; + let mut failed = 0usize; + + for (id, key_value) in &rows { + match crypto::decrypt_value(key_value, enc_key) { + Ok(plaintext) => { + // Re-encrypt with current key (idempotent if same key) + match crypto::encrypt_value(&plaintext, enc_key) { + Ok(new_encrypted) => { + if let Err(e) = sqlx::query( + "UPDATE provider_keys SET key_value = $1 WHERE id = $2" + ).bind(&new_encrypted).bind(id).execute(db).await { + tracing::warn!("[heal] Failed to update key {}: {}", id, e); + } else { + healed += 1; + } + } + Err(e) => { + tracing::warn!("[heal] Failed to re-encrypt key {}: {}", id, e); + failed += 1; + } + } + } + Err(e) => { + tracing::warn!("[heal] Cannot decrypt key {}, marking inactive: {}", id, e); + let _ = sqlx::query( + "UPDATE provider_keys SET is_active = FALSE WHERE id = $1" + ).bind(id).execute(db).await; + failed += 1; + } + } + } + + if healed > 0 || failed > 0 { + tracing::info!("[heal] Provider keys: {} re-encrypted, {} failed", healed, failed); + } + healed +}