fix: 审计后续 3 项修复 — 残留清理 + FTS5 CJK + HTTP 大小限制
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
1. Shell Hands 残留清理 (3处): - message.rs: 移除过时的 zclaw_hands::slideshow 注释 - user_profiler.rs: slideshow 偏好改为 RecentTopic - handStore.test.ts: 移除 speech mock 数据 (3→2) 2. zclaw-growth FTS5 CJK 查询修复: - sanitize_fts_query CJK 路径从精确短语改为 token OR 组合 - "Rust 编程" → "rust" OR "编程" (之前是 "rust 编程" 精确匹配) - 修复 test_memory_lifecycle + test_semantic_search_ranking 3. WASM HTTP 响应大小限制: - Content-Length 预检 + 读取后截断 (1MB 上限) - read_to_string 改为显式错误处理 651 测试全通过,0 失败。
This commit is contained in:
@@ -461,13 +461,58 @@ impl SqliteStorage {
|
||||
});
|
||||
|
||||
if has_cjk {
|
||||
// For CJK, use the full query as a quoted phrase for substring matching
|
||||
// trigram will match any 3-char subsequence
|
||||
if lower.len() >= 3 {
|
||||
format!("\"{}\"", lower)
|
||||
} else {
|
||||
String::new()
|
||||
// For CJK queries, extract tokens: CJK character sequences and ASCII words.
|
||||
// Join with OR for broad matching (not exact phrase, which would miss scattered terms).
|
||||
let mut tokens: Vec<String> = Vec::new();
|
||||
let mut cjk_buf = String::new();
|
||||
let mut ascii_buf = String::new();
|
||||
|
||||
for ch in lower.chars() {
|
||||
let is_cjk = matches!(ch, '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{F900}'..='\u{FAFF}');
|
||||
if is_cjk {
|
||||
if !ascii_buf.is_empty() {
|
||||
if ascii_buf.len() >= 2 {
|
||||
tokens.push(format!("\"{}\"", ascii_buf));
|
||||
}
|
||||
ascii_buf.clear();
|
||||
}
|
||||
cjk_buf.push(ch);
|
||||
} else if ch.is_alphanumeric() {
|
||||
if !cjk_buf.is_empty() {
|
||||
// Flush CJK buffer — each CJK character is a potential token
|
||||
// (trigram indexes 3-char sequences, so single CJK chars won't
|
||||
// match alone, but 2+ char sequences will)
|
||||
if cjk_buf.len() >= 2 {
|
||||
tokens.push(format!("\"{}\"", cjk_buf));
|
||||
}
|
||||
cjk_buf.clear();
|
||||
}
|
||||
ascii_buf.push(ch);
|
||||
} else {
|
||||
// Separator — flush both buffers
|
||||
if cjk_buf.len() >= 2 {
|
||||
tokens.push(format!("\"{}\"", cjk_buf));
|
||||
}
|
||||
cjk_buf.clear();
|
||||
if ascii_buf.len() >= 2 {
|
||||
tokens.push(format!("\"{}\"", ascii_buf));
|
||||
}
|
||||
ascii_buf.clear();
|
||||
}
|
||||
}
|
||||
// Flush remaining
|
||||
if cjk_buf.len() >= 2 {
|
||||
tokens.push(format!("\"{}\"", cjk_buf));
|
||||
}
|
||||
if ascii_buf.len() >= 2 {
|
||||
tokens.push(format!("\"{}\"", ascii_buf));
|
||||
}
|
||||
|
||||
if tokens.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
tokens.join(" OR ")
|
||||
} else {
|
||||
// For non-CJK, split into terms and join with OR
|
||||
let terms: Vec<String> = lower
|
||||
|
||||
Reference in New Issue
Block a user