fix(memory): FTS5 full-text search + browser hand autonomy gate
Some checks failed
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
M4-05: Replace LIKE-only search with FTS5-first strategy: - Add memories_fts virtual table (unicode61 tokenizer) - FTS5 MATCH primary path with CJK LIKE fallback - Sync FTS index on store() M3-03: Add autonomy approval check to browserHandStore: - executeTemplate: check canAutoExecute before running - executeScript: check approval gate for JS execution
This commit is contained in:
@@ -209,6 +209,19 @@ impl PersistentMemoryStore {
|
||||
.await
|
||||
.map_err(|e| format!("Failed to create schema: {}", e))?;
|
||||
|
||||
// Create FTS5 virtual table for full-text search
|
||||
let _ = sqlx::query(
|
||||
r#"
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
|
||||
id,
|
||||
content,
|
||||
tokenize='unicode61'
|
||||
)
|
||||
"#,
|
||||
)
|
||||
.execute(&mut *conn)
|
||||
.await;
|
||||
|
||||
// Migration: add overview column (L1 summary)
|
||||
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
|
||||
.execute(&mut *conn)
|
||||
@@ -265,6 +278,15 @@ impl PersistentMemoryStore {
|
||||
.await
|
||||
.map_err(|e| format!("Failed to store memory: {}", e))?;
|
||||
|
||||
// Sync FTS5 index
|
||||
let _ = sqlx::query(
|
||||
"INSERT OR REPLACE INTO memories_fts (id, content) VALUES (?, ?)"
|
||||
)
|
||||
.bind(&memory.id)
|
||||
.bind(&memory.content)
|
||||
.execute(&mut *conn)
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -296,63 +318,81 @@ impl PersistentMemoryStore {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Search memories with semantic ranking when embeddings are available
|
||||
/// Search memories with FTS5-first strategy and semantic ranking
|
||||
pub async fn search(&self, query: MemorySearchQuery) -> Result<Vec<PersistentMemory>, String> {
|
||||
let mut conn = self.conn.lock().await;
|
||||
|
||||
let mut sql = String::from("SELECT * FROM memories WHERE 1=1");
|
||||
let mut params: Vec<String> = Vec::new();
|
||||
|
||||
if let Some(agent_id) = &query.agent_id {
|
||||
sql.push_str(" AND agent_id = ?");
|
||||
params.push(agent_id.clone());
|
||||
}
|
||||
|
||||
if let Some(memory_type) = &query.memory_type {
|
||||
sql.push_str(" AND memory_type = ?");
|
||||
params.push(memory_type.clone());
|
||||
}
|
||||
|
||||
if let Some(min_importance) = query.min_importance {
|
||||
sql.push_str(" AND importance >= ?");
|
||||
params.push(min_importance.to_string());
|
||||
}
|
||||
|
||||
if let Some(query_text) = &query.query {
|
||||
sql.push_str(" AND content LIKE ?");
|
||||
params.push(format!("%{}%", query_text));
|
||||
}
|
||||
|
||||
// When using embedding ranking, fetch more candidates
|
||||
let effective_limit = if query.query.is_some() && is_embedding_configured() {
|
||||
query.limit.unwrap_or(50).max(20) // Fetch more for re-ranking
|
||||
query.limit.unwrap_or(50).max(20)
|
||||
} else {
|
||||
query.limit.unwrap_or(50)
|
||||
};
|
||||
|
||||
sql.push_str(&format!(" LIMIT {}", effective_limit));
|
||||
let results = if let Some(query_text) = &query.query {
|
||||
// FTS5-first search strategy
|
||||
let sanitized = sanitize_fts_query(query_text);
|
||||
|
||||
if let Some(offset) = query.offset {
|
||||
sql.push_str(&format!(" OFFSET {}", offset));
|
||||
}
|
||||
if !sanitized.is_empty() {
|
||||
// Try FTS5 MATCH first
|
||||
let mut sql = String::from(
|
||||
"SELECT m.* FROM memories m \
|
||||
INNER JOIN memories_fts f ON m.id = f.id \
|
||||
WHERE f.memories_fts MATCH ?"
|
||||
);
|
||||
let mut params: Vec<String> = vec![sanitized];
|
||||
|
||||
// Build and execute query dynamically
|
||||
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
|
||||
for param in params {
|
||||
query_builder = query_builder.bind(param);
|
||||
}
|
||||
if let Some(agent_id) = &query.agent_id {
|
||||
sql.push_str(" AND m.agent_id = ?");
|
||||
params.push(agent_id.clone());
|
||||
}
|
||||
if let Some(memory_type) = &query.memory_type {
|
||||
sql.push_str(" AND m.memory_type = ?");
|
||||
params.push(memory_type.clone());
|
||||
}
|
||||
if let Some(min_importance) = query.min_importance {
|
||||
sql.push_str(" AND m.importance >= ?");
|
||||
params.push(min_importance.to_string());
|
||||
}
|
||||
sql.push_str(&format!(" ORDER BY f.rank LIMIT {}", effective_limit));
|
||||
|
||||
let mut results = query_builder
|
||||
.fetch_all(&mut *conn)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to search memories: {}", e))?;
|
||||
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
|
||||
for param in params {
|
||||
query_builder = query_builder.bind(param);
|
||||
}
|
||||
let fts_results = query_builder
|
||||
.fetch_all(&mut *conn)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
if !fts_results.is_empty() {
|
||||
fts_results
|
||||
} else {
|
||||
// FTS5 miss — CJK LIKE fallback (unicode61 doesn't handle CJK)
|
||||
let has_cjk = query_text.chars().any(|c| {
|
||||
matches!(c, '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{F900}'..='\u{FAFF}')
|
||||
});
|
||||
if has_cjk {
|
||||
Self::like_search(&mut conn, &query, effective_limit).await
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No meaningful FTS5 terms, use LIKE fallback
|
||||
Self::like_search(&mut conn, &query, effective_limit).await
|
||||
}
|
||||
} else {
|
||||
// No text query — plain filtered scan
|
||||
Self::like_search(&mut conn, &query, effective_limit).await
|
||||
};
|
||||
|
||||
// Apply semantic ranking if query and embedding are available
|
||||
let mut final_results = results;
|
||||
if let Some(query_text) = &query.query {
|
||||
if is_embedding_configured() {
|
||||
if let Ok(query_embedding) = embed_text(query_text).await {
|
||||
// Score each result by cosine similarity
|
||||
let mut scored: Vec<(f32, PersistentMemory)> = results
|
||||
let mut scored: Vec<(f32, PersistentMemory)> = final_results
|
||||
.into_iter()
|
||||
.map(|mem| {
|
||||
let score = mem.embedding.as_ref()
|
||||
@@ -365,11 +405,9 @@ impl PersistentMemoryStore {
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Sort by score descending
|
||||
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
// Apply the original limit
|
||||
results = scored.into_iter()
|
||||
final_results = scored.into_iter()
|
||||
.take(query.limit.unwrap_or(20))
|
||||
.map(|(_, mem)| mem)
|
||||
.collect();
|
||||
@@ -377,7 +415,41 @@ impl PersistentMemoryStore {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
Ok(final_results)
|
||||
}
|
||||
|
||||
/// LIKE-based search fallback (used for CJK queries and non-text queries)
|
||||
async fn like_search(
|
||||
conn: &mut sqlx::SqliteConnection,
|
||||
query: &MemorySearchQuery,
|
||||
limit: usize,
|
||||
) -> Vec<PersistentMemory> {
|
||||
let mut sql = String::from("SELECT * FROM memories WHERE 1=1");
|
||||
let mut params: Vec<String> = Vec::new();
|
||||
|
||||
if let Some(agent_id) = &query.agent_id {
|
||||
sql.push_str(" AND agent_id = ?");
|
||||
params.push(agent_id.clone());
|
||||
}
|
||||
if let Some(memory_type) = &query.memory_type {
|
||||
sql.push_str(" AND memory_type = ?");
|
||||
params.push(memory_type.clone());
|
||||
}
|
||||
if let Some(min_importance) = query.min_importance {
|
||||
sql.push_str(" AND importance >= ?");
|
||||
params.push(min_importance.to_string());
|
||||
}
|
||||
if let Some(query_text) = &query.query {
|
||||
sql.push_str(" AND content LIKE ?");
|
||||
params.push(format!("%{}%", query_text));
|
||||
}
|
||||
sql.push_str(&format!(" LIMIT {}", limit));
|
||||
|
||||
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
|
||||
for param in params {
|
||||
query_builder = query_builder.bind(param);
|
||||
}
|
||||
query_builder.fetch_all(conn).await.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Delete a memory by ID
|
||||
@@ -497,6 +569,23 @@ impl PersistentMemoryStore {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sanitize a user query for FTS5 MATCH syntax.
|
||||
/// Strips FTS5 operators and keeps only alphanumeric + CJK tokens with length > 1.
|
||||
fn sanitize_fts_query(query: &str) -> String {
|
||||
let terms: Vec<String> = query
|
||||
.to_lowercase()
|
||||
.split(|c: char| !c.is_alphanumeric())
|
||||
.filter(|s| !s.is_empty() && s.len() > 1)
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
|
||||
if terms.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
terms.join(" OR ")
|
||||
}
|
||||
|
||||
/// Generate a unique memory ID
|
||||
#[allow(dead_code)] // Legacy: kept for potential migration use
|
||||
pub fn generate_memory_id() -> String {
|
||||
|
||||
@@ -26,6 +26,7 @@ import {
|
||||
type LogLevel,
|
||||
type SessionStatus,
|
||||
} from '../components/BrowserHand/templates';
|
||||
import { canAutoExecute } from '../lib/autonomy-manager';
|
||||
|
||||
// ============================================================================
|
||||
// Store State Interface
|
||||
@@ -220,6 +221,13 @@ export const useBrowserHandStore = create<BrowserHandState & BrowserHandActions>
|
||||
|
||||
// Template Execution
|
||||
executeTemplate: async (templateId: string, params: Record<string, unknown>) => {
|
||||
// Autonomy approval gate — browser hand requires_approval=true
|
||||
const { canProceed, decision } = canAutoExecute('hand_trigger' as any, 5);
|
||||
if (!canProceed) {
|
||||
set({ error: `Browser 操作需要审批: ${decision.reason || '请确认后重试'}` });
|
||||
throw new Error(`Browser 操作需要审批: ${decision.reason || 'requires approval'}`);
|
||||
}
|
||||
|
||||
const store = get();
|
||||
|
||||
// Find template
|
||||
@@ -339,6 +347,13 @@ export const useBrowserHandStore = create<BrowserHandState & BrowserHandActions>
|
||||
},
|
||||
|
||||
executeScript: async (script: string, args?: unknown[]) => {
|
||||
// Autonomy approval gate — arbitrary JS execution is high risk
|
||||
const { canProceed, decision } = canAutoExecute('hand_trigger' as any, 8);
|
||||
if (!canProceed) {
|
||||
set({ error: `脚本执行需要审批: ${decision.reason || '请确认后重试'}` });
|
||||
throw new Error(`Script execution requires approval: ${decision.reason || 'requires approval'}`);
|
||||
}
|
||||
|
||||
const store = get();
|
||||
|
||||
if (!store.activeSessionId) {
|
||||
|
||||
Reference in New Issue
Block a user