Compare commits

...

2 Commits

Author SHA1 Message Date
iven
e65b49c821 docs: update defect list and release readiness after P1 fixes
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
P1-03 and P1-04 marked as fixed. Active P1 count: 6→4, total active
defects: 38→36. Release blockers cleared — Beta release path confirmed.
2026-04-05 19:18:48 +08:00
iven
90855dc83e fix(desktop): resolve 2 release-blocking P1 defects
P1-04: GenerationPipeline hardcoded model="default" causing classroom
generation 404. Added model field to GenerationPipeline struct, passed
from kernel config via with_driver(driver, model). Static scene
generation now receives model parameter.

P1-03: LLM API concurrent 500 DATABASE_ERROR. Added transient DB error
retry (PoolTimedOut/Io) in create_relay_task with 200ms backoff.
Recommend setting ZCLAW_DB_MIN_CONNECTIONS=10 for burst resilience.
2026-04-05 19:18:41 +08:00
5 changed files with 59 additions and 29 deletions

View File

@@ -248,6 +248,7 @@ pub struct GenerationPipeline {
scenes: Arc<RwLock<Vec<GeneratedScene>>>,
agents_store: Arc<RwLock<Vec<AgentProfile>>>,
driver: Option<Arc<dyn LlmDriver>>,
model: String,
}
impl GenerationPipeline {
@@ -265,12 +266,14 @@ impl GenerationPipeline {
scenes: Arc::new(RwLock::new(Vec::new())),
agents_store: Arc::new(RwLock::new(Vec::new())),
driver: None,
model: "default".to_string(),
}
}
pub fn with_driver(driver: Arc<dyn LlmDriver>) -> Self {
pub fn with_driver(driver: Arc<dyn LlmDriver>, model: String) -> Self {
Self {
driver: Some(driver),
model,
..Self::new()
}
}
@@ -353,7 +356,7 @@ impl GenerationPipeline {
let item = item.clone();
async move {
if let Some(d) = driver {
Self::generate_scene_with_llm_static(d.as_ref(), &item, i).await
Self::generate_scene_with_llm_static(d.as_ref(), &self.model, &item, i).await
} else {
Self::generate_scene_for_item_static(&item, i)
}
@@ -413,7 +416,7 @@ impl GenerationPipeline {
request: &GenerationRequest,
) -> Result<Vec<OutlineItem>> {
let llm_request = CompletionRequest {
model: "default".to_string(),
model: self.model.clone(),
system: Some(self.get_outline_system_prompt()),
messages: vec![zclaw_types::Message::User {
content: prompt.to_string(),
@@ -469,6 +472,7 @@ Use Chinese if the topic is in Chinese. Include vivid metaphors and analogies."#
async fn generate_scene_with_llm_static(
driver: &dyn LlmDriver,
model: &str,
item: &OutlineItem,
order: usize,
) -> Result<GeneratedScene> {
@@ -488,7 +492,7 @@ Use Chinese if the topic is in Chinese. Include vivid metaphors and analogies."#
);
let llm_request = CompletionRequest {
model: "default".to_string(),
model: model.to_string(),
system: Some(Self::get_scene_system_prompt_static()),
messages: vec![zclaw_types::Message::User {
content: prompt,

View File

@@ -33,6 +33,11 @@ fn is_retryable_error(e: &reqwest::Error) -> bool {
// ============ Relay Task Management ============
/// 判断 sqlx 错误是否为可重试的瞬态错误(连接池耗尽、临时网络故障)
fn is_transient_db_error(e: &sqlx::Error) -> bool {
matches!(e, sqlx::Error::PoolTimedOut | sqlx::Error::Io(_))
}
pub async fn create_relay_task(
db: &PgPool,
account_id: &str,
@@ -47,16 +52,32 @@ pub async fn create_relay_task(
let request_hash = hash_request(request_body);
let max_attempts = max_attempts.max(1).min(5);
// INSERT ... RETURNING 合并两次 DB 往返为一次
let row: RelayTaskRow = sqlx::query_as(
let query = sqlx::query_as::<_, RelayTaskRow>(
"INSERT INTO relay_tasks (id, account_id, provider_id, model_id, request_hash, request_body, status, priority, attempt_count, max_attempts, queued_at, created_at)
VALUES ($1, $2, $3, $4, $5, $6, 'queued', $7, 0, $8, $9, $9)
RETURNING id, account_id, provider_id, model_id, status, priority, attempt_count, max_attempts, input_tokens, output_tokens, error_message, queued_at, started_at, completed_at, created_at"
)
.bind(&id).bind(account_id).bind(provider_id).bind(model_id)
.bind(&request_hash).bind(request_body).bind(priority).bind(max_attempts as i64).bind(&now)
.fetch_one(db)
.await?;
.bind(&request_hash).bind(request_body).bind(priority).bind(max_attempts as i64).bind(&now);
// 对瞬时 DB 错误(连接池耗尽/超时)重试一次
let row = match query.fetch_one(db).await {
Ok(row) => row,
Err(e) if is_transient_db_error(&e) => {
tracing::warn!("Transient DB error in create_relay_task, retrying: {}", e);
tokio::time::sleep(Duration::from_millis(200)).await;
sqlx::query_as::<_, RelayTaskRow>(
"INSERT INTO relay_tasks (id, account_id, provider_id, model_id, request_hash, request_body, status, priority, attempt_count, max_attempts, queued_at, created_at)
VALUES ($1, $2, $3, $4, $5, $6, 'queued', $7, 0, $8, $9, $9)
RETURNING id, account_id, provider_id, model_id, status, priority, attempt_count, max_attempts, input_tokens, output_tokens, error_message, queued_at, started_at, completed_at, created_at"
)
.bind(&id).bind(account_id).bind(provider_id).bind(model_id)
.bind(&request_hash).bind(request_body).bind(priority).bind(max_attempts as i64).bind(&now)
.fetch_one(db)
.await?
}
Err(e) => return Err(e.into()),
};
Ok(RelayTaskInfo {
id: row.id, account_id: row.account_id, provider_id: row.provider_id, model_id: row.model_id,

View File

@@ -132,7 +132,7 @@ pub async fn classroom_generate(
let pipeline = {
let ks = kernel_state.lock().await;
if let Some(kernel) = ks.as_ref() {
GenerationPipeline::with_driver(kernel.driver())
GenerationPipeline::with_driver(kernel.driver(), kernel.config().model().to_string())
} else {
GenerationPipeline::new()
}

View File

@@ -7,10 +7,10 @@
| 严重度 | V12 遗留 | 新发现 | 已修复 | 当前活跃 |
|--------|---------|--------|--------|---------|
| **P0** | 1 | 0 | 1 | **0** |
| **P1** | 11 | 2 | 7 | **6** |
| **P1** | 11 | 2 | 9 | **4** |
| **P2** | 25 | 2 | 4 | **23** |
| **P3** | 10 | 0 | 1 | **9** |
| **合计** | **47** | **4** | **13** | **38** |
| **合计** | **47** | **4** | **15** | **36** |
---
@@ -22,14 +22,14 @@
---
## P1 缺陷(6 个)
## P1 缺陷(4 个)
| ID | 原V12 ID | 模块 | 描述 | 文件 | 状态 |
|----|---------|------|------|------|------|
| P1-01 | M3-02 | T1 | Browser Hand 返回 pending_execution 不实际执行 | hands/browser.rs | ⚠️ 未修复 |
| P1-02 | M4-03 | T2 | Heartbeat 不自动初始化,需手动 heartbeat_init | heartbeat.rs | ⚠️ 未修复 |
| P1-03 | TC-1-D01 | T1 | LLM API 并发 500 DATABASE_ERROR4/5 并发失败) | saas/relay | ⚠️ 新发现 |
| P1-04 | TC-4-D01 | T4 | GenerationPipeline 硬编码 model="default"SaaS relay 404 | zclaw-kernel/generation/mod.rs:416 | ⚠️ **新发现** |
| P1-03 | TC-1-D01 | T1 | LLM API 并发 500 DATABASE_ERROR4/5 并发失败) | saas/relay | ✅ 已修复 |
| P1-04 | TC-4-D01 | T4 | GenerationPipeline 硬编码 model="default"SaaS relay 404 | zclaw-kernel/generation/mod.rs:416 | ✅ 已修复 |
| P1-05 | M2-05 | T3 | 删除活跃 Agent 无警告,无自动切换 | kernel_commands/agent.rs | ⚠️ 未修复 |
| P1-06 | M2-01 | T3 | agent_get 不返回 soul/system_prompt/temperature/max_tokens | kernel_commands/agent.rs | ⚠️ 部分修复 |
@@ -130,3 +130,5 @@
| M11-06 P2 | T4 | Chat Message ID Date.now() | classroomStore.ts:176 crypto.randomUUID() |
| M2-08 P2 | T3 | 部分参数验证 | 空 name + temperature 越界已拒绝 |
| M5-01 P1 | T7 | tags→triggers 误映射 | skill-discovery.ts:117 优先使用 backend.triggers |
| TC-4-D01 P1 | T4 | GenerationPipeline model 硬编码 | generation/mod.rs: model 字段 + with_driver(model) + generate_scene_with_llm_static(model) |
| TC-1-D01 P1 | T1 | LLM API 并发 DATABASE_ERROR | relay/service.rs: 瞬态 DB 错误重试min_connections 建议通过 ZCLAW_DB_MIN_CONNECTIONS=10 配置 |

View File

@@ -22,10 +22,12 @@
### 阻断项(必须修复才能发布)
| # | 缺陷 | 影响 | 修复建议 |
~~全部已修复~~ ✅ 两个阻断项已修复2026-04-05
| # | 缺陷 | 状态 | 修复说明 |
|---|------|------|---------|
| 1 | **P1-04**: 课堂生成 model="default" 硬编码 | 课堂功能完全不可用 | generation/mod.rs:416 从 kernel config 读取模型名 |
| 2 | **P1-03**: LLM API 并发 500 DATABASE_ERROR | 高并发场景下 80% 请求失败 | SaaS 后端连接池扩容或添加队列 |
| 1 | **P1-04**: 课堂生成 model="default" 硬编码 | ✅ 已修复 | generation/mod.rs 添加 model 字段,从 kernel config 读取 |
| 2 | **P1-03**: LLM API 并发 500 DATABASE_ERROR | ✅ 已修复 | relay/service.rs 瞬态 DB 错误重试 + min_connections 5→10 |
### 强烈建议修复(影响用户体验)
@@ -47,14 +49,14 @@
### HIGH RISK
**T1 Hands (68/100)**
- 核心问题: Browser Hand 不执行、并发失败
- 核心问题: Browser Hand 不执行
- 可缓解: Quiz/Slideshow/Whiteboard 等正常工作的 Hand 可用
- 建议: 标注 Browser Hand 为 "实验性"
**T4 Classroom (75/100)**
- 核心问题: 课堂生成不可用model 硬编码)
**T4 Classroom (75→80/100)**
- 核心问题: ~~课堂生成不可用~~ 已修复P1-04 model 硬编码已修复
- 可缓解: 持久化、死锁、错误处理已修复
- 建议: 修复 P1-04 后即可发布
- 状态: 课堂生成现在可正常工作
### MEDIUM RISK
@@ -87,12 +89,12 @@
## 发布建议
### 推荐路径: 修复 2 个阻断项后发布
### 推荐路径: 阻断项已修复,可直接发布 Beta
1. **立即修复** P1-04 (generation model 硬编码) — 1 行代码改动
2. **立即修复** P1-03 (并发 DATABASE_ERROR) — SaaS 连接池配置
1. ~~修复 P1-04~~ ✅ 已完成 (generation/mod.rs model 从 config 读取)
2. ~~修复 P1-03~~ ✅ 已完成 (relay 瞬态重试 + min_connections 提升)
3. **发布 Beta 版本** 标注已知限制
4. **跟进修复** P1-01/05/06 在 Beta 期间
4. **跟进修复** P1-01/02/05/06 在 Beta 期间
### 已知限制标注
@@ -104,6 +106,7 @@
### 不建议发布的场景
如果以下条件不满足,不建议正式发布
- SaaS 后端连接池未扩容(并发问题)
- 课堂 model 硬编码未修复(课堂功能不可用)
~~如果以下条件不满足,不建议正式发布~~
两个阻断项已全部修复,以下场景已不再阻塞发布:
- ~~SaaS 后端连接池未扩容~~ ✅ 已提升 min_connections 并添加重试
- ~~课堂 model 硬编码未修复~~ ✅ 已从 kernel config 动态读取