fix(desktop): resolve 2 release-blocking P1 defects

P1-04: GenerationPipeline hardcoded model="default" causing classroom
generation 404. Added model field to GenerationPipeline struct, passed
from kernel config via with_driver(driver, model). Static scene
generation now receives model parameter.

P1-03: LLM API concurrent 500 DATABASE_ERROR. Added transient DB error
retry (PoolTimedOut/Io) in create_relay_task with 200ms backoff.
Recommend setting ZCLAW_DB_MIN_CONNECTIONS=10 for burst resilience.
This commit is contained in:
iven
2026-04-05 19:18:41 +08:00
parent a458e3f7d8
commit 90855dc83e
3 changed files with 35 additions and 10 deletions

View File

@@ -33,6 +33,11 @@ fn is_retryable_error(e: &reqwest::Error) -> bool {
// ============ Relay Task Management ============
/// 判断 sqlx 错误是否为可重试的瞬态错误(连接池耗尽、临时网络故障)
fn is_transient_db_error(e: &sqlx::Error) -> bool {
matches!(e, sqlx::Error::PoolTimedOut | sqlx::Error::Io(_))
}
pub async fn create_relay_task(
db: &PgPool,
account_id: &str,
@@ -47,16 +52,32 @@ pub async fn create_relay_task(
let request_hash = hash_request(request_body);
let max_attempts = max_attempts.max(1).min(5);
// INSERT ... RETURNING 合并两次 DB 往返为一次
let row: RelayTaskRow = sqlx::query_as(
let query = sqlx::query_as::<_, RelayTaskRow>(
"INSERT INTO relay_tasks (id, account_id, provider_id, model_id, request_hash, request_body, status, priority, attempt_count, max_attempts, queued_at, created_at)
VALUES ($1, $2, $3, $4, $5, $6, 'queued', $7, 0, $8, $9, $9)
RETURNING id, account_id, provider_id, model_id, status, priority, attempt_count, max_attempts, input_tokens, output_tokens, error_message, queued_at, started_at, completed_at, created_at"
)
.bind(&id).bind(account_id).bind(provider_id).bind(model_id)
.bind(&request_hash).bind(request_body).bind(priority).bind(max_attempts as i64).bind(&now)
.fetch_one(db)
.await?;
.bind(&request_hash).bind(request_body).bind(priority).bind(max_attempts as i64).bind(&now);
// 对瞬时 DB 错误(连接池耗尽/超时)重试一次
let row = match query.fetch_one(db).await {
Ok(row) => row,
Err(e) if is_transient_db_error(&e) => {
tracing::warn!("Transient DB error in create_relay_task, retrying: {}", e);
tokio::time::sleep(Duration::from_millis(200)).await;
sqlx::query_as::<_, RelayTaskRow>(
"INSERT INTO relay_tasks (id, account_id, provider_id, model_id, request_hash, request_body, status, priority, attempt_count, max_attempts, queued_at, created_at)
VALUES ($1, $2, $3, $4, $5, $6, 'queued', $7, 0, $8, $9, $9)
RETURNING id, account_id, provider_id, model_id, status, priority, attempt_count, max_attempts, input_tokens, output_tokens, error_message, queued_at, started_at, completed_at, created_at"
)
.bind(&id).bind(account_id).bind(provider_id).bind(model_id)
.bind(&request_hash).bind(request_body).bind(priority).bind(max_attempts as i64).bind(&now)
.fetch_one(db)
.await?
}
Err(e) => return Err(e.into()),
};
Ok(RelayTaskInfo {
id: row.id, account_id: row.account_id, provider_id: row.provider_id, model_id: row.model_id,