fix(memory): FTS5 full-text search + browser hand autonomy gate
Some checks failed
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
M4-05: Replace LIKE-only search with FTS5-first strategy: - Add memories_fts virtual table (unicode61 tokenizer) - FTS5 MATCH primary path with CJK LIKE fallback - Sync FTS index on store() M3-03: Add autonomy approval check to browserHandStore: - executeTemplate: check canAutoExecute before running - executeScript: check approval gate for JS execution
This commit is contained in:
@@ -209,6 +209,19 @@ impl PersistentMemoryStore {
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| format!("Failed to create schema: {}", e))?;
|
.map_err(|e| format!("Failed to create schema: {}", e))?;
|
||||||
|
|
||||||
|
// Create FTS5 virtual table for full-text search
|
||||||
|
let _ = sqlx::query(
|
||||||
|
r#"
|
||||||
|
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5(
|
||||||
|
id,
|
||||||
|
content,
|
||||||
|
tokenize='unicode61'
|
||||||
|
)
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.execute(&mut *conn)
|
||||||
|
.await;
|
||||||
|
|
||||||
// Migration: add overview column (L1 summary)
|
// Migration: add overview column (L1 summary)
|
||||||
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
|
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
|
||||||
.execute(&mut *conn)
|
.execute(&mut *conn)
|
||||||
@@ -265,6 +278,15 @@ impl PersistentMemoryStore {
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| format!("Failed to store memory: {}", e))?;
|
.map_err(|e| format!("Failed to store memory: {}", e))?;
|
||||||
|
|
||||||
|
// Sync FTS5 index
|
||||||
|
let _ = sqlx::query(
|
||||||
|
"INSERT OR REPLACE INTO memories_fts (id, content) VALUES (?, ?)"
|
||||||
|
)
|
||||||
|
.bind(&memory.id)
|
||||||
|
.bind(&memory.content)
|
||||||
|
.execute(&mut *conn)
|
||||||
|
.await;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -296,63 +318,81 @@ impl PersistentMemoryStore {
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Search memories with semantic ranking when embeddings are available
|
/// Search memories with FTS5-first strategy and semantic ranking
|
||||||
pub async fn search(&self, query: MemorySearchQuery) -> Result<Vec<PersistentMemory>, String> {
|
pub async fn search(&self, query: MemorySearchQuery) -> Result<Vec<PersistentMemory>, String> {
|
||||||
let mut conn = self.conn.lock().await;
|
let mut conn = self.conn.lock().await;
|
||||||
|
|
||||||
let mut sql = String::from("SELECT * FROM memories WHERE 1=1");
|
|
||||||
let mut params: Vec<String> = Vec::new();
|
|
||||||
|
|
||||||
if let Some(agent_id) = &query.agent_id {
|
|
||||||
sql.push_str(" AND agent_id = ?");
|
|
||||||
params.push(agent_id.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(memory_type) = &query.memory_type {
|
|
||||||
sql.push_str(" AND memory_type = ?");
|
|
||||||
params.push(memory_type.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(min_importance) = query.min_importance {
|
|
||||||
sql.push_str(" AND importance >= ?");
|
|
||||||
params.push(min_importance.to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(query_text) = &query.query {
|
|
||||||
sql.push_str(" AND content LIKE ?");
|
|
||||||
params.push(format!("%{}%", query_text));
|
|
||||||
}
|
|
||||||
|
|
||||||
// When using embedding ranking, fetch more candidates
|
// When using embedding ranking, fetch more candidates
|
||||||
let effective_limit = if query.query.is_some() && is_embedding_configured() {
|
let effective_limit = if query.query.is_some() && is_embedding_configured() {
|
||||||
query.limit.unwrap_or(50).max(20) // Fetch more for re-ranking
|
query.limit.unwrap_or(50).max(20)
|
||||||
} else {
|
} else {
|
||||||
query.limit.unwrap_or(50)
|
query.limit.unwrap_or(50)
|
||||||
};
|
};
|
||||||
|
|
||||||
sql.push_str(&format!(" LIMIT {}", effective_limit));
|
let results = if let Some(query_text) = &query.query {
|
||||||
|
// FTS5-first search strategy
|
||||||
|
let sanitized = sanitize_fts_query(query_text);
|
||||||
|
|
||||||
if let Some(offset) = query.offset {
|
if !sanitized.is_empty() {
|
||||||
sql.push_str(&format!(" OFFSET {}", offset));
|
// Try FTS5 MATCH first
|
||||||
}
|
let mut sql = String::from(
|
||||||
|
"SELECT m.* FROM memories m \
|
||||||
|
INNER JOIN memories_fts f ON m.id = f.id \
|
||||||
|
WHERE f.memories_fts MATCH ?"
|
||||||
|
);
|
||||||
|
let mut params: Vec<String> = vec![sanitized];
|
||||||
|
|
||||||
// Build and execute query dynamically
|
if let Some(agent_id) = &query.agent_id {
|
||||||
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
|
sql.push_str(" AND m.agent_id = ?");
|
||||||
for param in params {
|
params.push(agent_id.clone());
|
||||||
query_builder = query_builder.bind(param);
|
}
|
||||||
}
|
if let Some(memory_type) = &query.memory_type {
|
||||||
|
sql.push_str(" AND m.memory_type = ?");
|
||||||
|
params.push(memory_type.clone());
|
||||||
|
}
|
||||||
|
if let Some(min_importance) = query.min_importance {
|
||||||
|
sql.push_str(" AND m.importance >= ?");
|
||||||
|
params.push(min_importance.to_string());
|
||||||
|
}
|
||||||
|
sql.push_str(&format!(" ORDER BY f.rank LIMIT {}", effective_limit));
|
||||||
|
|
||||||
let mut results = query_builder
|
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
|
||||||
.fetch_all(&mut *conn)
|
for param in params {
|
||||||
.await
|
query_builder = query_builder.bind(param);
|
||||||
.map_err(|e| format!("Failed to search memories: {}", e))?;
|
}
|
||||||
|
let fts_results = query_builder
|
||||||
|
.fetch_all(&mut *conn)
|
||||||
|
.await
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
if !fts_results.is_empty() {
|
||||||
|
fts_results
|
||||||
|
} else {
|
||||||
|
// FTS5 miss — CJK LIKE fallback (unicode61 doesn't handle CJK)
|
||||||
|
let has_cjk = query_text.chars().any(|c| {
|
||||||
|
matches!(c, '\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{F900}'..='\u{FAFF}')
|
||||||
|
});
|
||||||
|
if has_cjk {
|
||||||
|
Self::like_search(&mut conn, &query, effective_limit).await
|
||||||
|
} else {
|
||||||
|
Vec::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No meaningful FTS5 terms, use LIKE fallback
|
||||||
|
Self::like_search(&mut conn, &query, effective_limit).await
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No text query — plain filtered scan
|
||||||
|
Self::like_search(&mut conn, &query, effective_limit).await
|
||||||
|
};
|
||||||
|
|
||||||
// Apply semantic ranking if query and embedding are available
|
// Apply semantic ranking if query and embedding are available
|
||||||
|
let mut final_results = results;
|
||||||
if let Some(query_text) = &query.query {
|
if let Some(query_text) = &query.query {
|
||||||
if is_embedding_configured() {
|
if is_embedding_configured() {
|
||||||
if let Ok(query_embedding) = embed_text(query_text).await {
|
if let Ok(query_embedding) = embed_text(query_text).await {
|
||||||
// Score each result by cosine similarity
|
let mut scored: Vec<(f32, PersistentMemory)> = final_results
|
||||||
let mut scored: Vec<(f32, PersistentMemory)> = results
|
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|mem| {
|
.map(|mem| {
|
||||||
let score = mem.embedding.as_ref()
|
let score = mem.embedding.as_ref()
|
||||||
@@ -365,11 +405,9 @@ impl PersistentMemoryStore {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// Sort by score descending
|
|
||||||
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
|
|
||||||
// Apply the original limit
|
final_results = scored.into_iter()
|
||||||
results = scored.into_iter()
|
|
||||||
.take(query.limit.unwrap_or(20))
|
.take(query.limit.unwrap_or(20))
|
||||||
.map(|(_, mem)| mem)
|
.map(|(_, mem)| mem)
|
||||||
.collect();
|
.collect();
|
||||||
@@ -377,7 +415,41 @@ impl PersistentMemoryStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results)
|
Ok(final_results)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// LIKE-based search fallback (used for CJK queries and non-text queries)
|
||||||
|
async fn like_search(
|
||||||
|
conn: &mut sqlx::SqliteConnection,
|
||||||
|
query: &MemorySearchQuery,
|
||||||
|
limit: usize,
|
||||||
|
) -> Vec<PersistentMemory> {
|
||||||
|
let mut sql = String::from("SELECT * FROM memories WHERE 1=1");
|
||||||
|
let mut params: Vec<String> = Vec::new();
|
||||||
|
|
||||||
|
if let Some(agent_id) = &query.agent_id {
|
||||||
|
sql.push_str(" AND agent_id = ?");
|
||||||
|
params.push(agent_id.clone());
|
||||||
|
}
|
||||||
|
if let Some(memory_type) = &query.memory_type {
|
||||||
|
sql.push_str(" AND memory_type = ?");
|
||||||
|
params.push(memory_type.clone());
|
||||||
|
}
|
||||||
|
if let Some(min_importance) = query.min_importance {
|
||||||
|
sql.push_str(" AND importance >= ?");
|
||||||
|
params.push(min_importance.to_string());
|
||||||
|
}
|
||||||
|
if let Some(query_text) = &query.query {
|
||||||
|
sql.push_str(" AND content LIKE ?");
|
||||||
|
params.push(format!("%{}%", query_text));
|
||||||
|
}
|
||||||
|
sql.push_str(&format!(" LIMIT {}", limit));
|
||||||
|
|
||||||
|
let mut query_builder = sqlx::query_as::<_, PersistentMemory>(&sql);
|
||||||
|
for param in params {
|
||||||
|
query_builder = query_builder.bind(param);
|
||||||
|
}
|
||||||
|
query_builder.fetch_all(conn).await.unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Delete a memory by ID
|
/// Delete a memory by ID
|
||||||
@@ -497,6 +569,23 @@ impl PersistentMemoryStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sanitize a user query for FTS5 MATCH syntax.
|
||||||
|
/// Strips FTS5 operators and keeps only alphanumeric + CJK tokens with length > 1.
|
||||||
|
fn sanitize_fts_query(query: &str) -> String {
|
||||||
|
let terms: Vec<String> = query
|
||||||
|
.to_lowercase()
|
||||||
|
.split(|c: char| !c.is_alphanumeric())
|
||||||
|
.filter(|s| !s.is_empty() && s.len() > 1)
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if terms.is_empty() {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
terms.join(" OR ")
|
||||||
|
}
|
||||||
|
|
||||||
/// Generate a unique memory ID
|
/// Generate a unique memory ID
|
||||||
#[allow(dead_code)] // Legacy: kept for potential migration use
|
#[allow(dead_code)] // Legacy: kept for potential migration use
|
||||||
pub fn generate_memory_id() -> String {
|
pub fn generate_memory_id() -> String {
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ import {
|
|||||||
type LogLevel,
|
type LogLevel,
|
||||||
type SessionStatus,
|
type SessionStatus,
|
||||||
} from '../components/BrowserHand/templates';
|
} from '../components/BrowserHand/templates';
|
||||||
|
import { canAutoExecute } from '../lib/autonomy-manager';
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Store State Interface
|
// Store State Interface
|
||||||
@@ -220,6 +221,13 @@ export const useBrowserHandStore = create<BrowserHandState & BrowserHandActions>
|
|||||||
|
|
||||||
// Template Execution
|
// Template Execution
|
||||||
executeTemplate: async (templateId: string, params: Record<string, unknown>) => {
|
executeTemplate: async (templateId: string, params: Record<string, unknown>) => {
|
||||||
|
// Autonomy approval gate — browser hand requires_approval=true
|
||||||
|
const { canProceed, decision } = canAutoExecute('hand_trigger' as any, 5);
|
||||||
|
if (!canProceed) {
|
||||||
|
set({ error: `Browser 操作需要审批: ${decision.reason || '请确认后重试'}` });
|
||||||
|
throw new Error(`Browser 操作需要审批: ${decision.reason || 'requires approval'}`);
|
||||||
|
}
|
||||||
|
|
||||||
const store = get();
|
const store = get();
|
||||||
|
|
||||||
// Find template
|
// Find template
|
||||||
@@ -339,6 +347,13 @@ export const useBrowserHandStore = create<BrowserHandState & BrowserHandActions>
|
|||||||
},
|
},
|
||||||
|
|
||||||
executeScript: async (script: string, args?: unknown[]) => {
|
executeScript: async (script: string, args?: unknown[]) => {
|
||||||
|
// Autonomy approval gate — arbitrary JS execution is high risk
|
||||||
|
const { canProceed, decision } = canAutoExecute('hand_trigger' as any, 8);
|
||||||
|
if (!canProceed) {
|
||||||
|
set({ error: `脚本执行需要审批: ${decision.reason || '请确认后重试'}` });
|
||||||
|
throw new Error(`Script execution requires approval: ${decision.reason || 'requires approval'}`);
|
||||||
|
}
|
||||||
|
|
||||||
const store = get();
|
const store = get();
|
||||||
|
|
||||||
if (!store.activeSessionId) {
|
if (!store.activeSessionId) {
|
||||||
|
|||||||
Reference in New Issue
Block a user