From 450569dc88b9fdeb7485498301723bf53c585c59 Mon Sep 17 00:00:00 2001 From: iven Date: Sat, 18 Apr 2026 09:23:58 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=AE=A1=E8=AE=A1=E5=90=8E=E7=BB=AD=203?= =?UTF-8?q?=20=E9=A1=B9=E4=BF=AE=E5=A4=8D=20=E2=80=94=20=E6=AE=8B=E7=95=99?= =?UTF-8?q?=E6=B8=85=E7=90=86=20+=20FTS5=20CJK=20+=20HTTP=20=E5=A4=A7?= =?UTF-8?q?=E5=B0=8F=E9=99=90=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Shell Hands 残留清理 (3处): - message.rs: 移除过时的 zclaw_hands::slideshow 注释 - user_profiler.rs: slideshow 偏好改为 RecentTopic - handStore.test.ts: 移除 speech mock 数据 (3→2) 2. zclaw-growth FTS5 CJK 查询修复: - sanitize_fts_query CJK 路径从精确短语改为 token OR 组合 - "Rust 编程" → "rust" OR "编程" (之前是 "rust 编程" 精确匹配) - 修复 test_memory_lifecycle + test_semantic_search_ranking 3. WASM HTTP 响应大小限制: - Content-Length 预检 + 读取后截断 (1MB 上限) - read_to_string 改为显式错误处理 651 测试全通过,0 失败。 --- crates/zclaw-growth/src/storage/sqlite.rs | 57 +++++++++++++++++-- crates/zclaw-skills/src/wasm_runner.rs | 31 +++++++++- crates/zclaw-types/src/message.rs | 1 - .../src/intelligence/user_profiler.rs | 2 +- desktop/tests/store/handStore.test.ts | 3 +- 5 files changed, 82 insertions(+), 12 deletions(-) diff --git a/crates/zclaw-growth/src/storage/sqlite.rs b/crates/zclaw-growth/src/storage/sqlite.rs index 3755b2b..8b4d10d 100644 --- a/crates/zclaw-growth/src/storage/sqlite.rs +++ b/crates/zclaw-growth/src/storage/sqlite.rs @@ -461,13 +461,58 @@ impl SqliteStorage { }); if has_cjk { - // For CJK, use the full query as a quoted phrase for substring matching - // trigram will match any 3-char subsequence - if lower.len() >= 3 { - format!("\"{}\"", lower) - } else { - String::new() + // For CJK queries, extract tokens: CJK character sequences and ASCII words. + // Join with OR for broad matching (not exact phrase, which would miss scattered terms). + let mut tokens: Vec = Vec::new(); + let mut cjk_buf = String::new(); + let mut ascii_buf = String::new(); + + for ch in lower.chars() { + let is_cjk = matches!(ch, '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{F900}'..='\u{FAFF}'); + if is_cjk { + if !ascii_buf.is_empty() { + if ascii_buf.len() >= 2 { + tokens.push(format!("\"{}\"", ascii_buf)); + } + ascii_buf.clear(); + } + cjk_buf.push(ch); + } else if ch.is_alphanumeric() { + if !cjk_buf.is_empty() { + // Flush CJK buffer — each CJK character is a potential token + // (trigram indexes 3-char sequences, so single CJK chars won't + // match alone, but 2+ char sequences will) + if cjk_buf.len() >= 2 { + tokens.push(format!("\"{}\"", cjk_buf)); + } + cjk_buf.clear(); + } + ascii_buf.push(ch); + } else { + // Separator — flush both buffers + if cjk_buf.len() >= 2 { + tokens.push(format!("\"{}\"", cjk_buf)); + } + cjk_buf.clear(); + if ascii_buf.len() >= 2 { + tokens.push(format!("\"{}\"", ascii_buf)); + } + ascii_buf.clear(); + } } + // Flush remaining + if cjk_buf.len() >= 2 { + tokens.push(format!("\"{}\"", cjk_buf)); + } + if ascii_buf.len() >= 2 { + tokens.push(format!("\"{}\"", ascii_buf)); + } + + if tokens.is_empty() { + return String::new(); + } + + tokens.join(" OR ") } else { // For non-CJK, split into terms and join with OR let terms: Vec = lower diff --git a/crates/zclaw-skills/src/wasm_runner.rs b/crates/zclaw-skills/src/wasm_runner.rs index f84f283..a6b6841 100644 --- a/crates/zclaw-skills/src/wasm_runner.rs +++ b/crates/zclaw-skills/src/wasm_runner.rs @@ -9,6 +9,7 @@ use async_trait::async_trait; use serde_json::Value; +use std::io::Read as IoRead; use std::path::PathBuf; use tracing::{debug, warn}; use wasmtime::*; @@ -23,6 +24,9 @@ use crate::{Skill, SkillContext, SkillManifest, SkillResult}; /// Maximum WASM binary size (10 MB). const MAX_WASM_SIZE: usize = 10 * 1024 * 1024; +/// Maximum HTTP response body size for host function (1 MB). +const MAX_HTTP_RESPONSE_SIZE: usize = 1024 * 1024; + /// Fuel per second of CPU time (heuristic: ~10M instructions/sec). const FUEL_PER_SEC: u64 = 10_000_000; @@ -318,8 +322,31 @@ fn add_host_functions(linker: &mut Linker, network_allowed: bool) -> match response { Ok(mut resp) => { - let body = resp.body_mut().read_to_string().unwrap_or_default(); - write_guest_bytes(&mut caller, out_ptr, out_cap, body.as_bytes()) + // Enforce response size limit before reading body + let content_length = resp.header("content-length") + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.parse::().ok()); + if let Some(len) = content_length { + if len > MAX_HTTP_RESPONSE_SIZE { + warn!("[WasmSkill] http_fetch denied — response too large: {} bytes (max {})", len, MAX_HTTP_RESPONSE_SIZE); + return -1; + } + } + let mut body = String::new(); + match resp.body_mut().read_to_string(&mut body) { + Ok(_) => { + if body.len() > MAX_HTTP_RESPONSE_SIZE { + warn!("[WasmSkill] http_fetch — response exceeded limit after read, truncating"); + body.truncate(MAX_HTTP_RESPONSE_SIZE); + } + write_guest_bytes(&mut caller, out_ptr, out_cap, body.as_bytes()) + } + Err(e) => { + warn!("[WasmSkill] http_fetch body read error: {}", e); + -1 + } + } + } } Err(e) => { warn!("[WasmSkill] http_fetch error for {}: {}", url, e); diff --git a/crates/zclaw-types/src/message.rs b/crates/zclaw-types/src/message.rs index e9985af..598a33a 100644 --- a/crates/zclaw-types/src/message.rs +++ b/crates/zclaw-types/src/message.rs @@ -116,7 +116,6 @@ impl Message { /// Canonical LLM message content block. Used for agent conversation messages. /// See also: zclaw_runtime::driver::ContentBlock (LLM driver response subset), -/// zclaw_hands::slideshow::ContentBlock (presentation rendering), /// zclaw_protocols::mcp_types::ContentBlock (MCP protocol wire format). #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] diff --git a/desktop/src-tauri/src/intelligence/user_profiler.rs b/desktop/src-tauri/src/intelligence/user_profiler.rs index e46f207..d81d594 100644 --- a/desktop/src-tauri/src/intelligence/user_profiler.rs +++ b/desktop/src-tauri/src/intelligence/user_profiler.rs @@ -88,7 +88,7 @@ fn classify_fact_content(fact: &Fact) -> Option { return Some(ProfileFieldUpdate::PreferredTool("collector".into())); } if content.contains("幻灯") || content.contains("演示") || content.contains("ppt") { - return Some(ProfileFieldUpdate::PreferredTool("slideshow".into())); + return Some(ProfileFieldUpdate::RecentTopic("演示文稿".into())); } // Default: treat as a recent topic diff --git a/desktop/tests/store/handStore.test.ts b/desktop/tests/store/handStore.test.ts index b2fe499..2c605a2 100644 --- a/desktop/tests/store/handStore.test.ts +++ b/desktop/tests/store/handStore.test.ts @@ -115,14 +115,13 @@ describe('handStore — loadHands', () => { hands: [ { id: 'h1', name: 'browser', description: 'Web automation', status: 'idle', requirements_met: true, category: 'automation', icon: '🌐', tool_count: 5, metric_count: 2 }, { id: 'h2', name: 'researcher', description: 'Deep research', status: 'running', requirements_met: true }, - { id: 'h3', name: 'speech', description: 'TTS', requirements_met: false }, ], }); await useHandStore.getState().loadHands(); const state = useHandStore.getState(); - expect(state.hands).toHaveLength(3); + expect(state.hands).toHaveLength(2); expect(state.hands[0].name).toBe('browser'); expect(state.hands[0].status).toBe('idle'); expect(state.hands[0].toolCount).toBe(5);