Files
zclaw_openfang/crates/zclaw-runtime/src/middleware/memory.rs
iven 1bf0d3a73d fix(memory): CJK-aware short query threshold + Chinese synonym expansion
1. MemoryMiddleware: replace byte-length check (query.len() < 4) with
   char-count check (query.chars().count() < 2). Single CJK characters
   are 3 UTF-8 bytes but 1 meaningful character — the old threshold
   incorrectly skipped 1-2 char Chinese queries like "你好".

2. QueryAnalyzer: add Chinese synonym mappings for 13 common technical
   terms (错误→bug, 优化→improve, 配置→config, etc.) so CJK queries
   can find relevant English-keyword memories and vice versa.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-02 01:21:29 +08:00

138 lines
5.2 KiB
Rust

//! Memory middleware — unified pre/post hooks for memory retrieval and extraction.
//!
//! This middleware unifies the memory lifecycle:
//! - `before_completion`: retrieves relevant memories and injects them into the system prompt
//! - `after_completion`: extracts learnings from the conversation and stores them
//!
//! It replaces both the inline `GrowthIntegration` calls in `AgentLoop` and the
//! `intelligence_hooks` calls in the Tauri desktop layer.
use async_trait::async_trait;
use zclaw_types::Result;
use crate::growth::GrowthIntegration;
use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
/// Middleware that handles memory retrieval (pre-completion) and extraction (post-completion).
///
/// Wraps `GrowthIntegration` and delegates:
/// - `before_completion` → `enhance_prompt()` for memory injection
/// - `after_completion` → `process_conversation()` for memory extraction
pub struct MemoryMiddleware {
growth: GrowthIntegration,
/// Minimum seconds between extractions for the same agent (debounce).
debounce_secs: u64,
/// Timestamp of last extraction per agent (for debouncing).
last_extraction: std::sync::Mutex<std::collections::HashMap<String, std::time::Instant>>,
}
impl MemoryMiddleware {
pub fn new(growth: GrowthIntegration) -> Self {
Self {
growth,
debounce_secs: 30,
last_extraction: std::sync::Mutex::new(std::collections::HashMap::new()),
}
}
/// Set the debounce interval in seconds.
pub fn with_debounce_secs(mut self, secs: u64) -> Self {
self.debounce_secs = secs;
self
}
/// Check if enough time has passed since the last extraction for this agent.
fn should_extract(&self, agent_id: &str) -> bool {
let now = std::time::Instant::now();
let mut map = self.last_extraction.lock().unwrap();
if let Some(last) = map.get(agent_id) {
if now.duration_since(*last).as_secs() < self.debounce_secs {
return false;
}
}
map.insert(agent_id.to_string(), now);
true
}
}
#[async_trait]
impl AgentMiddleware for MemoryMiddleware {
fn name(&self) -> &str { "memory" }
fn priority(&self) -> i32 { 150 }
async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
// Skip memory injection for very short queries.
// Short queries (e.g., "1+6", "hi", "好") don't benefit from memory context.
// Worse, the retriever's scope-based fallback may return high-importance but
// irrelevant old memories, causing the model to think about past conversations
// instead of answering the current question.
// Use char count (not byte count) so CJK queries are handled correctly:
// a single Chinese char is 3 UTF-8 bytes but 1 meaningful character.
let query = ctx.user_input.trim();
if query.chars().count() < 2 {
tracing::debug!(
"[MemoryMiddleware] Skipping enhancement for short query ({:?}): no memory context needed",
query
);
return Ok(MiddlewareDecision::Continue);
}
match self.growth.enhance_prompt(
&ctx.agent_id,
&ctx.system_prompt,
&ctx.user_input,
).await {
Ok(enhanced) => {
ctx.system_prompt = enhanced;
Ok(MiddlewareDecision::Continue)
}
Err(e) => {
// Non-fatal: memory retrieval failure should not block the loop
tracing::warn!("[MemoryMiddleware] Prompt enhancement failed: {}", e);
Ok(MiddlewareDecision::Continue)
}
}
}
async fn after_completion(&self, ctx: &MiddlewareContext) -> Result<()> {
// Debounce: skip extraction if called too recently for this agent
let agent_key = ctx.agent_id.to_string();
if !self.should_extract(&agent_key) {
tracing::debug!(
"[MemoryMiddleware] Skipping extraction for agent {} (debounced)",
agent_key
);
return Ok(());
}
if ctx.messages.is_empty() {
return Ok(());
}
// Combined extraction: single LLM call produces both memories and structured facts.
// Avoids double LLM extraction ( process_conversation + extract_structured_facts).
match self.growth.extract_combined(
&ctx.agent_id,
&ctx.messages,
&ctx.session_id,
).await {
Ok(Some((mem_count, facts))) => {
tracing::info!(
"[MemoryMiddleware] Extracted {} memories + {} structured facts for agent {}",
mem_count,
facts.len(),
agent_key
);
}
Ok(None) => {
tracing::debug!("[MemoryMiddleware] No memories or facts extracted");
}
Err(e) => {
// Non-fatal: extraction failure should not affect the response
tracing::warn!("[MemoryMiddleware] Combined extraction failed: {}", e);
}
}
Ok(())
}
}