fix: 审计后续 3 项修复 — 残留清理 + FTS5 CJK + HTTP 大小限制
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
1. Shell Hands 残留清理 (3处): - message.rs: 移除过时的 zclaw_hands::slideshow 注释 - user_profiler.rs: slideshow 偏好改为 RecentTopic - handStore.test.ts: 移除 speech mock 数据 (3→2) 2. zclaw-growth FTS5 CJK 查询修复: - sanitize_fts_query CJK 路径从精确短语改为 token OR 组合 - "Rust 编程" → "rust" OR "编程" (之前是 "rust 编程" 精确匹配) - 修复 test_memory_lifecycle + test_semantic_search_ranking 3. WASM HTTP 响应大小限制: - Content-Length 预检 + 读取后截断 (1MB 上限) - read_to_string 改为显式错误处理 651 测试全通过,0 失败。
This commit is contained in:
@@ -461,13 +461,58 @@ impl SqliteStorage {
|
|||||||
});
|
});
|
||||||
|
|
||||||
if has_cjk {
|
if has_cjk {
|
||||||
// For CJK, use the full query as a quoted phrase for substring matching
|
// For CJK queries, extract tokens: CJK character sequences and ASCII words.
|
||||||
// trigram will match any 3-char subsequence
|
// Join with OR for broad matching (not exact phrase, which would miss scattered terms).
|
||||||
if lower.len() >= 3 {
|
let mut tokens: Vec<String> = Vec::new();
|
||||||
format!("\"{}\"", lower)
|
let mut cjk_buf = String::new();
|
||||||
} else {
|
let mut ascii_buf = String::new();
|
||||||
String::new()
|
|
||||||
|
for ch in lower.chars() {
|
||||||
|
let is_cjk = matches!(ch, '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{F900}'..='\u{FAFF}');
|
||||||
|
if is_cjk {
|
||||||
|
if !ascii_buf.is_empty() {
|
||||||
|
if ascii_buf.len() >= 2 {
|
||||||
|
tokens.push(format!("\"{}\"", ascii_buf));
|
||||||
|
}
|
||||||
|
ascii_buf.clear();
|
||||||
|
}
|
||||||
|
cjk_buf.push(ch);
|
||||||
|
} else if ch.is_alphanumeric() {
|
||||||
|
if !cjk_buf.is_empty() {
|
||||||
|
// Flush CJK buffer — each CJK character is a potential token
|
||||||
|
// (trigram indexes 3-char sequences, so single CJK chars won't
|
||||||
|
// match alone, but 2+ char sequences will)
|
||||||
|
if cjk_buf.len() >= 2 {
|
||||||
|
tokens.push(format!("\"{}\"", cjk_buf));
|
||||||
|
}
|
||||||
|
cjk_buf.clear();
|
||||||
|
}
|
||||||
|
ascii_buf.push(ch);
|
||||||
|
} else {
|
||||||
|
// Separator — flush both buffers
|
||||||
|
if cjk_buf.len() >= 2 {
|
||||||
|
tokens.push(format!("\"{}\"", cjk_buf));
|
||||||
|
}
|
||||||
|
cjk_buf.clear();
|
||||||
|
if ascii_buf.len() >= 2 {
|
||||||
|
tokens.push(format!("\"{}\"", ascii_buf));
|
||||||
|
}
|
||||||
|
ascii_buf.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// Flush remaining
|
||||||
|
if cjk_buf.len() >= 2 {
|
||||||
|
tokens.push(format!("\"{}\"", cjk_buf));
|
||||||
|
}
|
||||||
|
if ascii_buf.len() >= 2 {
|
||||||
|
tokens.push(format!("\"{}\"", ascii_buf));
|
||||||
|
}
|
||||||
|
|
||||||
|
if tokens.is_empty() {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens.join(" OR ")
|
||||||
} else {
|
} else {
|
||||||
// For non-CJK, split into terms and join with OR
|
// For non-CJK, split into terms and join with OR
|
||||||
let terms: Vec<String> = lower
|
let terms: Vec<String> = lower
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
use std::io::Read as IoRead;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, warn};
|
||||||
use wasmtime::*;
|
use wasmtime::*;
|
||||||
@@ -23,6 +24,9 @@ use crate::{Skill, SkillContext, SkillManifest, SkillResult};
|
|||||||
/// Maximum WASM binary size (10 MB).
|
/// Maximum WASM binary size (10 MB).
|
||||||
const MAX_WASM_SIZE: usize = 10 * 1024 * 1024;
|
const MAX_WASM_SIZE: usize = 10 * 1024 * 1024;
|
||||||
|
|
||||||
|
/// Maximum HTTP response body size for host function (1 MB).
|
||||||
|
const MAX_HTTP_RESPONSE_SIZE: usize = 1024 * 1024;
|
||||||
|
|
||||||
/// Fuel per second of CPU time (heuristic: ~10M instructions/sec).
|
/// Fuel per second of CPU time (heuristic: ~10M instructions/sec).
|
||||||
const FUEL_PER_SEC: u64 = 10_000_000;
|
const FUEL_PER_SEC: u64 = 10_000_000;
|
||||||
|
|
||||||
@@ -318,8 +322,31 @@ fn add_host_functions(linker: &mut Linker<WasiP1Ctx>, network_allowed: bool) ->
|
|||||||
|
|
||||||
match response {
|
match response {
|
||||||
Ok(mut resp) => {
|
Ok(mut resp) => {
|
||||||
let body = resp.body_mut().read_to_string().unwrap_or_default();
|
// Enforce response size limit before reading body
|
||||||
write_guest_bytes(&mut caller, out_ptr, out_cap, body.as_bytes())
|
let content_length = resp.header("content-length")
|
||||||
|
.and_then(|v| v.to_str().ok())
|
||||||
|
.and_then(|v| v.parse::<usize>().ok());
|
||||||
|
if let Some(len) = content_length {
|
||||||
|
if len > MAX_HTTP_RESPONSE_SIZE {
|
||||||
|
warn!("[WasmSkill] http_fetch denied — response too large: {} bytes (max {})", len, MAX_HTTP_RESPONSE_SIZE);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut body = String::new();
|
||||||
|
match resp.body_mut().read_to_string(&mut body) {
|
||||||
|
Ok(_) => {
|
||||||
|
if body.len() > MAX_HTTP_RESPONSE_SIZE {
|
||||||
|
warn!("[WasmSkill] http_fetch — response exceeded limit after read, truncating");
|
||||||
|
body.truncate(MAX_HTTP_RESPONSE_SIZE);
|
||||||
|
}
|
||||||
|
write_guest_bytes(&mut caller, out_ptr, out_cap, body.as_bytes())
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("[WasmSkill] http_fetch body read error: {}", e);
|
||||||
|
-1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
warn!("[WasmSkill] http_fetch error for {}: {}", url, e);
|
warn!("[WasmSkill] http_fetch error for {}: {}", url, e);
|
||||||
|
|||||||
@@ -116,7 +116,6 @@ impl Message {
|
|||||||
|
|
||||||
/// Canonical LLM message content block. Used for agent conversation messages.
|
/// Canonical LLM message content block. Used for agent conversation messages.
|
||||||
/// See also: zclaw_runtime::driver::ContentBlock (LLM driver response subset),
|
/// See also: zclaw_runtime::driver::ContentBlock (LLM driver response subset),
|
||||||
/// zclaw_hands::slideshow::ContentBlock (presentation rendering),
|
|
||||||
/// zclaw_protocols::mcp_types::ContentBlock (MCP protocol wire format).
|
/// zclaw_protocols::mcp_types::ContentBlock (MCP protocol wire format).
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
#[serde(tag = "type", rename_all = "snake_case")]
|
#[serde(tag = "type", rename_all = "snake_case")]
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ fn classify_fact_content(fact: &Fact) -> Option<ProfileFieldUpdate> {
|
|||||||
return Some(ProfileFieldUpdate::PreferredTool("collector".into()));
|
return Some(ProfileFieldUpdate::PreferredTool("collector".into()));
|
||||||
}
|
}
|
||||||
if content.contains("幻灯") || content.contains("演示") || content.contains("ppt") {
|
if content.contains("幻灯") || content.contains("演示") || content.contains("ppt") {
|
||||||
return Some(ProfileFieldUpdate::PreferredTool("slideshow".into()));
|
return Some(ProfileFieldUpdate::RecentTopic("演示文稿".into()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default: treat as a recent topic
|
// Default: treat as a recent topic
|
||||||
|
|||||||
@@ -115,14 +115,13 @@ describe('handStore — loadHands', () => {
|
|||||||
hands: [
|
hands: [
|
||||||
{ id: 'h1', name: 'browser', description: 'Web automation', status: 'idle', requirements_met: true, category: 'automation', icon: '🌐', tool_count: 5, metric_count: 2 },
|
{ id: 'h1', name: 'browser', description: 'Web automation', status: 'idle', requirements_met: true, category: 'automation', icon: '🌐', tool_count: 5, metric_count: 2 },
|
||||||
{ id: 'h2', name: 'researcher', description: 'Deep research', status: 'running', requirements_met: true },
|
{ id: 'h2', name: 'researcher', description: 'Deep research', status: 'running', requirements_met: true },
|
||||||
{ id: 'h3', name: 'speech', description: 'TTS', requirements_met: false },
|
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
await useHandStore.getState().loadHands();
|
await useHandStore.getState().loadHands();
|
||||||
|
|
||||||
const state = useHandStore.getState();
|
const state = useHandStore.getState();
|
||||||
expect(state.hands).toHaveLength(3);
|
expect(state.hands).toHaveLength(2);
|
||||||
expect(state.hands[0].name).toBe('browser');
|
expect(state.hands[0].name).toBe('browser');
|
||||||
expect(state.hands[0].status).toBe('idle');
|
expect(state.hands[0].status).toBe('idle');
|
||||||
expect(state.hands[0].toolCount).toBe(5);
|
expect(state.hands[0].toolCount).toBe(5);
|
||||||
|
|||||||
Reference in New Issue
Block a user