zclaw_openfang/crates/zclaw-runtime/src/middleware/memory.rs

//! Memory middleware — unified pre/post hooks for memory retrieval and extraction.
//!
//! This middleware unifies the memory lifecycle:
//! - `before_completion`: retrieves relevant memories and injects them into the system prompt
//! - `after_completion`: extracts learnings from the conversation and stores them
//!
//! It replaces both the inline `GrowthIntegration` calls in `AgentLoop` and the
//! `intelligence_hooks` calls in the Tauri desktop layer.

use async_trait::async_trait;
use zclaw_types::Result;
use crate::growth::GrowthIntegration;
use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};

/// Middleware that handles memory retrieval (pre-completion) and extraction (post-completion).
///
/// Wraps `GrowthIntegration` and delegates:
/// - `before_completion` → `enhance_prompt()` for memory injection
/// - `after_completion` → `process_conversation()` for memory extraction
pub struct MemoryMiddleware {
    growth: GrowthIntegration,
    /// Minimum seconds between extractions for the same agent (debounce).
    debounce_secs: u64,
    /// Timestamp of last extraction per agent (for debouncing).
    last_extraction: std::sync::Mutex<std::collections::HashMap<String, std::time::Instant>>,
}

impl MemoryMiddleware {
    pub fn new(growth: GrowthIntegration) -> Self {
        Self {
            growth,
            debounce_secs: 30,
            last_extraction: std::sync::Mutex::new(std::collections::HashMap::new()),
        }
    }

    /// Set the debounce interval in seconds.
    pub fn with_debounce_secs(mut self, secs: u64) -> Self {
        self.debounce_secs = secs;
        self
    }

    /// Check if enough time has passed since the last extraction for this agent.
    fn should_extract(&self, agent_id: &str) -> bool {
        let now = std::time::Instant::now();
        let mut map = self.last_extraction.lock().unwrap();
        if let Some(last) = map.get(agent_id) {
            if now.duration_since(*last).as_secs() < self.debounce_secs {
                return false;
            }
        }
        map.insert(agent_id.to_string(), now);
        true
    }
}

#[async_trait]
impl AgentMiddleware for MemoryMiddleware {
    fn name(&self) -> &str { "memory" }
    fn priority(&self) -> i32 { 150 }

    async fn before_completion(&self, ctx: &mut MiddlewareContext) -> Result<MiddlewareDecision> {
        // Skip memory injection for very short queries.
        // Short queries (e.g., "1+6", "hi", "好") don't benefit from memory context.
        // Worse, the retriever's scope-based fallback may return high-importance but
        // irrelevant old memories, causing the model to think about past conversations
        // instead of answering the current question.
        // Use char count (not byte count) so CJK queries are handled correctly:
        // a single Chinese char is 3 UTF-8 bytes but 1 meaningful character.
        let query = ctx.user_input.trim();
        if query.chars().count() < 2 {
            tracing::debug!(
                "[MemoryMiddleware] Skipping enhancement for short query ({:?}): no memory context needed",
                query
            );
            return Ok(MiddlewareDecision::Continue);
        }

        match self.growth.enhance_prompt(
            &ctx.agent_id,
            &ctx.system_prompt,
            &ctx.user_input,
        ).await {
            Ok(enhanced) => {
                ctx.system_prompt = enhanced;
                Ok(MiddlewareDecision::Continue)
            }
            Err(e) => {
                // Non-fatal: memory retrieval failure should not block the loop
                tracing::warn!("[MemoryMiddleware] Prompt enhancement failed: {}", e);
                Ok(MiddlewareDecision::Continue)
            }
        }
    }

    async fn after_completion(&self, ctx: &MiddlewareContext) -> Result<()> {
        // Debounce: skip extraction if called too recently for this agent
        let agent_key = ctx.agent_id.to_string();
        if !self.should_extract(&agent_key) {
            tracing::debug!(
                "[MemoryMiddleware] Skipping extraction for agent {} (debounced)",
                agent_key
            );
            return Ok(());
        }

        if ctx.messages.is_empty() {
            return Ok(());
        }

        // Combined extraction: single LLM call produces both memories and structured facts.
        // Avoids double LLM extraction ( process_conversation + extract_structured_facts).
        match self.growth.extract_combined(
            &ctx.agent_id,
            &ctx.messages,
            &ctx.session_id,
        ).await {
            Ok(Some((mem_count, facts))) => {
                tracing::info!(
                    "[MemoryMiddleware] Extracted {} memories + {} structured facts for agent {}",
                    mem_count,
                    facts.len(),
                    agent_key
                );
            }
            Ok(None) => {
                tracing::debug!("[MemoryMiddleware] No memories or facts extracted");
            }
            Err(e) => {
                // Non-fatal: extraction failure should not affect the response
                tracing::warn!("[MemoryMiddleware] Combined extraction failed: {}", e);
            }
        }

        Ok(())
    }
}