fix(memory): FTS5 full-text search + browser hand autonomy gate
Some checks failed
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

M4-05: Replace LIKE-only search with FTS5-first strategy:
- Add memories_fts virtual table (unicode61 tokenizer)
- FTS5 MATCH primary path with CJK LIKE fallback
- Sync FTS index on store()

M3-03: Add autonomy approval check to browserHandStore:
- executeTemplate: check canAutoExecute before running
- executeScript: check approval gate for JS execution
This commit is contained in:
iven
2026-04-04 18:52:02 +08:00
parent 59f660b93b
commit 985644dd9a
2 changed files with 148 additions and 44 deletions

View File

@@ -209,6 +209,19 @@ impl PersistentMemoryStore {
.await
.map_err(|e| format!("Failed to create schema: {}", e))?;
// Create FTS5 virtual table for full-text search
let _ = sqlx::query(
r#"
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
id,
content,
tokenize='unicode61'
)
"#,
)
.execute(&mut *conn)
.await;
// Migration: add overview column (L1 summary)
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
.execute(&mut *conn)
@@ -265,6 +278,15 @@ impl PersistentMemoryStore {
.await
.map_err(|e| format!("Failed to store memory: {}", e))?;
// Sync FTS5 index
let _ = sqlx::query(
"INSERT OR REPLACE INTO memories_fts (id, content) VALUES (?, ?)"
)
.bind(&memory.id)
.bind(&memory.content)
.execute(&mut *conn)
.await;
Ok(())
}
@@ -296,63 +318,81 @@ impl PersistentMemoryStore {
Ok(result)
}
/// Search memories with semantic ranking when embeddings are available
/// Search memories with FTS5-first strategy and semantic ranking
pub async fn search(&self, query: MemorySearchQuery) -> Result<Vec<PersistentMemory>, String> {
let mut conn = self.conn.lock().await;
let mut sql = String::from("SELECT * FROM memories WHERE 1=1");
let mut params: Vec<String> = Vec::new();
if let Some(agent_id) = &query.agent_id {
sql.push_str(" AND agent_id = ?");
params.push(agent_id.clone());
}
if let Some(memory_type) = &query.memory_type {
sql.push_str(" AND memory_type = ?");
params.push(memory_type.clone());
}
if let Some(min_importance) = query.min_importance {
sql.push_str(" AND importance >= ?");
params.push(min_importance.to_string());
}
if let Some(query_text) = &query.query {
sql.push_str(" AND content LIKE ?");
params.push(format!("%{}%", query_text));
}
// When using embedding ranking, fetch more candidates
let effective_limit = if query.query.is_some() && is_embedding_configured() {
query.limit.unwrap_or(50).max(20) // Fetch more for re-ranking
query.limit.unwrap_or(50).max(20)
} else {
query.limit.unwrap_or(50)
};
sql.push_str(&format!(" LIMIT {}", effective_limit));
let results = if let Some(query_text) = &query.query {
// FTS5-first search strategy
let sanitized = sanitize_fts_query(query_text);
if let Some(offset) = query.offset {
sql.push_str(&format!(" OFFSET {}", offset));
}
if !sanitized.is_empty() {
// Try FTS5 MATCH first
let mut sql = String::from(
"SELECT m.* FROM memories m \
INNER JOIN memories_fts f ON m.id = f.id \
WHERE f.memories_fts MATCH ?"
);
let mut params: Vec<String> = vec![sanitized];
// Build and execute query dynamically
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
for param in params {
query_builder = query_builder.bind(param);
}
if let Some(agent_id) = &query.agent_id {
sql.push_str(" AND m.agent_id = ?");
params.push(agent_id.clone());
}
if let Some(memory_type) = &query.memory_type {
sql.push_str(" AND m.memory_type = ?");
params.push(memory_type.clone());
}
if let Some(min_importance) = query.min_importance {
sql.push_str(" AND m.importance >= ?");
params.push(min_importance.to_string());
}
sql.push_str(&format!(" ORDER BY f.rank LIMIT {}", effective_limit));
let mut results = query_builder
.fetch_all(&mut *conn)
.await
.map_err(|e| format!("Failed to search memories: {}", e))?;
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
for param in params {
query_builder = query_builder.bind(param);
}
let fts_results = query_builder
.fetch_all(&mut *conn)
.await
.unwrap_or_default();
if !fts_results.is_empty() {
fts_results
} else {
// FTS5 miss — CJK LIKE fallback (unicode61 doesn't handle CJK)
let has_cjk = query_text.chars().any(|c| {
matches!(c, '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{F900}'..='\u{FAFF}')
});
if has_cjk {
Self::like_search(&mut conn, &query, effective_limit).await
} else {
Vec::new()
}
}
} else {
// No meaningful FTS5 terms, use LIKE fallback
Self::like_search(&mut conn, &query, effective_limit).await
}
} else {
// No text query — plain filtered scan
Self::like_search(&mut conn, &query, effective_limit).await
};
// Apply semantic ranking if query and embedding are available
let mut final_results = results;
if let Some(query_text) = &query.query {
if is_embedding_configured() {
if let Ok(query_embedding) = embed_text(query_text).await {
// Score each result by cosine similarity
let mut scored: Vec<(f32, PersistentMemory)> = results
let mut scored: Vec<(f32, PersistentMemory)> = final_results
.into_iter()
.map(|mem| {
let score = mem.embedding.as_ref()
@@ -365,11 +405,9 @@ impl PersistentMemoryStore {
})
.collect();
// Sort by score descending
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
// Apply the original limit
results = scored.into_iter()
final_results = scored.into_iter()
.take(query.limit.unwrap_or(20))
.map(|(_, mem)| mem)
.collect();
@@ -377,7 +415,41 @@ impl PersistentMemoryStore {
}
}
Ok(results)
Ok(final_results)
}
/// LIKE-based search fallback (used for CJK queries and non-text queries)
async fn like_search(
conn: &mut sqlx::SqliteConnection,
query: &MemorySearchQuery,
limit: usize,
) -> Vec<PersistentMemory> {
let mut sql = String::from("SELECT * FROM memories WHERE 1=1");
let mut params: Vec<String> = Vec::new();
if let Some(agent_id) = &query.agent_id {
sql.push_str(" AND agent_id = ?");
params.push(agent_id.clone());
}
if let Some(memory_type) = &query.memory_type {
sql.push_str(" AND memory_type = ?");
params.push(memory_type.clone());
}
if let Some(min_importance) = query.min_importance {
sql.push_str(" AND importance >= ?");
params.push(min_importance.to_string());
}
if let Some(query_text) = &query.query {
sql.push_str(" AND content LIKE ?");
params.push(format!("%{}%", query_text));
}
sql.push_str(&format!(" LIMIT {}", limit));
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
for param in params {
query_builder = query_builder.bind(param);
}
query_builder.fetch_all(conn).await.unwrap_or_default()
}
/// Delete a memory by ID
@@ -497,6 +569,23 @@ impl PersistentMemoryStore {
}
}
/// Sanitize a user query for FTS5 MATCH syntax.
/// Strips FTS5 operators and keeps only alphanumeric + CJK tokens with length > 1.
fn sanitize_fts_query(query: &str) -> String {
let terms: Vec<String> = query
.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty() && s.len() > 1)
.map(|s| s.to_string())
.collect();
if terms.is_empty() {
return String::new();
}
terms.join(" OR ")
}
/// Generate a unique memory ID
#[allow(dead_code)] // Legacy: kept for potential migration use
pub fn generate_memory_id() -> String {