1. MemoryMiddleware: replace byte-length check (query.len() < 4) with char-count check (query.chars().count() < 2). Single CJK characters are 3 UTF-8 bytes but 1 meaningful character — the old threshold incorrectly skipped 1-2 char Chinese queries like "你好". 2. QueryAnalyzer: add Chinese synonym mappings for 13 common technical terms (错误→bug, 优化→improve, 配置→config, etc.) so CJK queries can find relevant English-keyword memories and vice versa. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
138 lines
5.2 KiB
Rust
138 lines
5.2 KiB
Rust
//! Memory middleware — unified pre/post hooks for memory retrieval and extraction.
|
|
//!
|
|
//! This middleware unifies the memory lifecycle:
|
|
//! - `before_completion`: retrieves relevant memories and injects them into the system prompt
|
|
//! - `after_completion`: extracts learnings from the conversation and stores them
|
|
//!
|
|
//! It replaces both the inline `GrowthIntegration` calls in `AgentLoop` and the
|
|
//! `intelligence_hooks` calls in the Tauri desktop layer.
|
|
|
|
use async_trait::async_trait;
|
|
use zclaw_types::Result;
|
|
use crate::growth::GrowthIntegration;
|
|
use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
|
|
|
|
/// Middleware that handles memory retrieval (pre-completion) and extraction (post-completion).
|
|
///
|
|
/// Wraps `GrowthIntegration` and delegates:
|
|
/// - `before_completion` → `enhance_prompt()` for memory injection
|
|
/// - `after_completion` → `process_conversation()` for memory extraction
|
|
pub struct MemoryMiddleware {
|
|
growth: GrowthIntegration,
|
|
/// Minimum seconds between extractions for the same agent (debounce).
|
|
debounce_secs: u64,
|
|
/// Timestamp of last extraction per agent (for debouncing).
|
|
last_extraction: std::sync::Mutex<std::collections::HashMap<String, std::time::Instant>>,
|
|
}
|
|
|
|
impl MemoryMiddleware {
|
|
pub fn new(growth: GrowthIntegration) -> Self {
|
|
Self {
|
|
growth,
|
|
debounce_secs: 30,
|
|
last_extraction: std::sync::Mutex::new(std::collections::HashMap::new()),
|
|
}
|
|
}
|
|
|
|
/// Set the debounce interval in seconds.
|
|
pub fn with_debounce_secs(mut self, secs: u64) -> Self {
|
|
self.debounce_secs = secs;
|
|
self
|
|
}
|
|
|
|
/// Check if enough time has passed since the last extraction for this agent.
|
|
fn should_extract(&self, agent_id: &str) -> bool {
|
|
let now = std::time::Instant::now();
|
|
let mut map = self.last_extraction.lock().unwrap();
|
|
if let Some(last) = map.get(agent_id) {
|
|
if now.duration_since(*last).as_secs() < self.debounce_secs {
|
|
return false;
|
|
}
|
|
}
|
|
map.insert(agent_id.to_string(), now);
|
|
true
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl AgentMiddleware for MemoryMiddleware {
|
|
fn name(&self) -> &str { "memory" }
|
|
fn priority(&self) -> i32 { 150 }
|
|
|
|
async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
|
|
// Skip memory injection for very short queries.
|
|
// Short queries (e.g., "1+6", "hi", "好") don't benefit from memory context.
|
|
// Worse, the retriever's scope-based fallback may return high-importance but
|
|
// irrelevant old memories, causing the model to think about past conversations
|
|
// instead of answering the current question.
|
|
// Use char count (not byte count) so CJK queries are handled correctly:
|
|
// a single Chinese char is 3 UTF-8 bytes but 1 meaningful character.
|
|
let query = ctx.user_input.trim();
|
|
if query.chars().count() < 2 {
|
|
tracing::debug!(
|
|
"[MemoryMiddleware] Skipping enhancement for short query ({:?}): no memory context needed",
|
|
query
|
|
);
|
|
return Ok(MiddlewareDecision::Continue);
|
|
}
|
|
|
|
match self.growth.enhance_prompt(
|
|
&ctx.agent_id,
|
|
&ctx.system_prompt,
|
|
&ctx.user_input,
|
|
).await {
|
|
Ok(enhanced) => {
|
|
ctx.system_prompt = enhanced;
|
|
Ok(MiddlewareDecision::Continue)
|
|
}
|
|
Err(e) => {
|
|
// Non-fatal: memory retrieval failure should not block the loop
|
|
tracing::warn!("[MemoryMiddleware] Prompt enhancement failed: {}", e);
|
|
Ok(MiddlewareDecision::Continue)
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn after_completion(&self, ctx: &MiddlewareContext) -> Result<()> {
|
|
// Debounce: skip extraction if called too recently for this agent
|
|
let agent_key = ctx.agent_id.to_string();
|
|
if !self.should_extract(&agent_key) {
|
|
tracing::debug!(
|
|
"[MemoryMiddleware] Skipping extraction for agent {} (debounced)",
|
|
agent_key
|
|
);
|
|
return Ok(());
|
|
}
|
|
|
|
if ctx.messages.is_empty() {
|
|
return Ok(());
|
|
}
|
|
|
|
// Combined extraction: single LLM call produces both memories and structured facts.
|
|
// Avoids double LLM extraction ( process_conversation + extract_structured_facts).
|
|
match self.growth.extract_combined(
|
|
&ctx.agent_id,
|
|
&ctx.messages,
|
|
&ctx.session_id,
|
|
).await {
|
|
Ok(Some((mem_count, facts))) => {
|
|
tracing::info!(
|
|
"[MemoryMiddleware] Extracted {} memories + {} structured facts for agent {}",
|
|
mem_count,
|
|
facts.len(),
|
|
agent_key
|
|
);
|
|
}
|
|
Ok(None) => {
|
|
tracing::debug!("[MemoryMiddleware] No memories or facts extracted");
|
|
}
|
|
Err(e) => {
|
|
// Non-fatal: extraction failure should not affect the response
|
|
tracing::warn!("[MemoryMiddleware] Combined extraction failed: {}", e);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|