From 985644dd9a9c9083c7b6f4ea257fcd75cdd65716 Mon Sep 17 00:00:00 2001 From: iven Date: Sat, 4 Apr 2026 18:52:02 +0800 Subject: [PATCH] fix(memory): FTS5 full-text search + browser hand autonomy gate M4-05: Replace LIKE-only search with FTS5-first strategy: - Add memories_fts virtual table (unicode61 tokenizer) - FTS5 MATCH primary path with CJK LIKE fallback - Sync FTS index on store() M3-03: Add autonomy approval check to browserHandStore: - executeTemplate: check canAutoExecute before running - executeScript: check approval gate for JS execution --- desktop/src-tauri/src/memory/persistent.rs | 177 ++++++++++++++++----- desktop/src/store/browserHandStore.ts | 15 ++ 2 files changed, 148 insertions(+), 44 deletions(-) diff --git a/desktop/src-tauri/src/memory/persistent.rs b/desktop/src-tauri/src/memory/persistent.rs index 38a37df..36c9869 100644 --- a/desktop/src-tauri/src/memory/persistent.rs +++ b/desktop/src-tauri/src/memory/persistent.rs @@ -209,6 +209,19 @@ impl PersistentMemoryStore { .await .map_err(|e| format!("Failed to create schema: {}", e))?; + // Create FTS5 virtual table for full-text search + let _ = sqlx::query( + r#" + CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5( + id, + content, + tokenize='unicode61' + ) + "#, + ) + .execute(&mut *conn) + .await; + // Migration: add overview column (L1 summary) let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT") .execute(&mut *conn) @@ -265,6 +278,15 @@ impl PersistentMemoryStore { .await .map_err(|e| format!("Failed to store memory: {}", e))?; + // Sync FTS5 index + let _ = sqlx::query( + "INSERT OR REPLACE INTO memories_fts (id, content) VALUES (?, ?)" + ) + .bind(&memory.id) + .bind(&memory.content) + .execute(&mut *conn) + .await; + Ok(()) } @@ -296,63 +318,81 @@ impl PersistentMemoryStore { Ok(result) } - /// Search memories with semantic ranking when embeddings are available + /// Search memories with FTS5-first strategy and semantic ranking pub async fn search(&self, query: MemorySearchQuery) -> Result, String> { let mut conn = self.conn.lock().await; - let mut sql = String::from("SELECT * FROM memories WHERE 1=1"); - let mut params: Vec = Vec::new(); - - if let Some(agent_id) = &query.agent_id { - sql.push_str(" AND agent_id = ?"); - params.push(agent_id.clone()); - } - - if let Some(memory_type) = &query.memory_type { - sql.push_str(" AND memory_type = ?"); - params.push(memory_type.clone()); - } - - if let Some(min_importance) = query.min_importance { - sql.push_str(" AND importance >= ?"); - params.push(min_importance.to_string()); - } - - if let Some(query_text) = &query.query { - sql.push_str(" AND content LIKE ?"); - params.push(format!("%{}%", query_text)); - } - // When using embedding ranking, fetch more candidates let effective_limit = if query.query.is_some() && is_embedding_configured() { - query.limit.unwrap_or(50).max(20) // Fetch more for re-ranking + query.limit.unwrap_or(50).max(20) } else { query.limit.unwrap_or(50) }; - sql.push_str(&format!(" LIMIT {}", effective_limit)); + let results = if let Some(query_text) = &query.query { + // FTS5-first search strategy + let sanitized = sanitize_fts_query(query_text); - if let Some(offset) = query.offset { - sql.push_str(&format!(" OFFSET {}", offset)); - } + if !sanitized.is_empty() { + // Try FTS5 MATCH first + let mut sql = String::from( + "SELECT m.* FROM memories m \ + INNER JOIN memories_fts f ON m.id = f.id \ + WHERE f.memories_fts MATCH ?" + ); + let mut params: Vec = vec![sanitized]; - // Build and execute query dynamically - let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql); - for param in params { - query_builder = query_builder.bind(param); - } + if let Some(agent_id) = &query.agent_id { + sql.push_str(" AND m.agent_id = ?"); + params.push(agent_id.clone()); + } + if let Some(memory_type) = &query.memory_type { + sql.push_str(" AND m.memory_type = ?"); + params.push(memory_type.clone()); + } + if let Some(min_importance) = query.min_importance { + sql.push_str(" AND m.importance >= ?"); + params.push(min_importance.to_string()); + } + sql.push_str(&format!(" ORDER BY f.rank LIMIT {}", effective_limit)); - let mut results = query_builder - .fetch_all(&mut *conn) - .await - .map_err(|e| format!("Failed to search memories: {}", e))?; + let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql); + for param in params { + query_builder = query_builder.bind(param); + } + let fts_results = query_builder + .fetch_all(&mut *conn) + .await + .unwrap_or_default(); + + if !fts_results.is_empty() { + fts_results + } else { + // FTS5 miss — CJK LIKE fallback (unicode61 doesn't handle CJK) + let has_cjk = query_text.chars().any(|c| { + matches!(c, '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{F900}'..='\u{FAFF}') + }); + if has_cjk { + Self::like_search(&mut conn, &query, effective_limit).await + } else { + Vec::new() + } + } + } else { + // No meaningful FTS5 terms, use LIKE fallback + Self::like_search(&mut conn, &query, effective_limit).await + } + } else { + // No text query — plain filtered scan + Self::like_search(&mut conn, &query, effective_limit).await + }; // Apply semantic ranking if query and embedding are available + let mut final_results = results; if let Some(query_text) = &query.query { if is_embedding_configured() { if let Ok(query_embedding) = embed_text(query_text).await { - // Score each result by cosine similarity - let mut scored: Vec<(f32, PersistentMemory)> = results + let mut scored: Vec<(f32, PersistentMemory)> = final_results .into_iter() .map(|mem| { let score = mem.embedding.as_ref() @@ -365,11 +405,9 @@ impl PersistentMemoryStore { }) .collect(); - // Sort by score descending scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); - // Apply the original limit - results = scored.into_iter() + final_results = scored.into_iter() .take(query.limit.unwrap_or(20)) .map(|(_, mem)| mem) .collect(); @@ -377,7 +415,41 @@ impl PersistentMemoryStore { } } - Ok(results) + Ok(final_results) + } + + /// LIKE-based search fallback (used for CJK queries and non-text queries) + async fn like_search( + conn: &mut sqlx::SqliteConnection, + query: &MemorySearchQuery, + limit: usize, + ) -> Vec { + let mut sql = String::from("SELECT * FROM memories WHERE 1=1"); + let mut params: Vec = Vec::new(); + + if let Some(agent_id) = &query.agent_id { + sql.push_str(" AND agent_id = ?"); + params.push(agent_id.clone()); + } + if let Some(memory_type) = &query.memory_type { + sql.push_str(" AND memory_type = ?"); + params.push(memory_type.clone()); + } + if let Some(min_importance) = query.min_importance { + sql.push_str(" AND importance >= ?"); + params.push(min_importance.to_string()); + } + if let Some(query_text) = &query.query { + sql.push_str(" AND content LIKE ?"); + params.push(format!("%{}%", query_text)); + } + sql.push_str(&format!(" LIMIT {}", limit)); + + let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql); + for param in params { + query_builder = query_builder.bind(param); + } + query_builder.fetch_all(conn).await.unwrap_or_default() } /// Delete a memory by ID @@ -497,6 +569,23 @@ impl PersistentMemoryStore { } } +/// Sanitize a user query for FTS5 MATCH syntax. +/// Strips FTS5 operators and keeps only alphanumeric + CJK tokens with length > 1. +fn sanitize_fts_query(query: &str) -> String { + let terms: Vec = query + .to_lowercase() + .split(|c: char| !c.is_alphanumeric()) + .filter(|s| !s.is_empty() && s.len() > 1) + .map(|s| s.to_string()) + .collect(); + + if terms.is_empty() { + return String::new(); + } + + terms.join(" OR ") +} + /// Generate a unique memory ID #[allow(dead_code)] // Legacy: kept for potential migration use pub fn generate_memory_id() -> String { diff --git a/desktop/src/store/browserHandStore.ts b/desktop/src/store/browserHandStore.ts index 9fa0d5b..ae0e33b 100644 --- a/desktop/src/store/browserHandStore.ts +++ b/desktop/src/store/browserHandStore.ts @@ -26,6 +26,7 @@ import { type LogLevel, type SessionStatus, } from '../components/BrowserHand/templates'; +import { canAutoExecute } from '../lib/autonomy-manager'; // ============================================================================ // Store State Interface @@ -220,6 +221,13 @@ export const useBrowserHandStore = create // Template Execution executeTemplate: async (templateId: string, params: Record) => { + // Autonomy approval gate — browser hand requires_approval=true + const { canProceed, decision } = canAutoExecute('hand_trigger' as any, 5); + if (!canProceed) { + set({ error: `Browser 操作需要审批: ${decision.reason || '请确认后重试'}` }); + throw new Error(`Browser 操作需要审批: ${decision.reason || 'requires approval'}`); + } + const store = get(); // Find template @@ -339,6 +347,13 @@ export const useBrowserHandStore = create }, executeScript: async (script: string, args?: unknown[]) => { + // Autonomy approval gate — arbitrary JS execution is high risk + const { canProceed, decision } = canAutoExecute('hand_trigger' as any, 8); + if (!canProceed) { + set({ error: `脚本执行需要审批: ${decision.reason || '请确认后重试'}` }); + throw new Error(`Script execution requires approval: ${decision.reason || 'requires approval'}`); + } + const store = get(); if (!store.activeSessionId) {