zclaw_openfang/crates/zclaw-growth/src/summarizer.rs

//! Memory Summarizer - L0/L1 Summary Generation
//!
//! Provides trait and functions for generating layered summaries of memory entries:
//! - L1 Overview: 1-2 sentence summary (~200 tokens)
//! - L0 Abstract: 3-5 keywords (~100 tokens)
//!
//! The trait-based design allows zclaw-growth to remain decoupled from any
//! specific LLM implementation. The Tauri layer provides a concrete implementation.

use crate::types::MemoryEntry;

/// LLM driver for summary generation.
/// Implementations call an LLM API to produce concise summaries.
#[async_trait::async_trait]
pub trait SummaryLlmDriver: Send + Sync {
    /// Generate a short summary (1-2 sentences, ~200 tokens) for a memory entry.
    async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String>;

    /// Generate keyword extraction (3-5 keywords, ~100 tokens) for a memory entry.
    async fn generate_abstract(&self, entry: &MemoryEntry) -> Result<String, String>;
}

/// Generate an L1 overview prompt for the LLM.
pub fn overview_prompt(entry: &MemoryEntry) -> String {
    format!(
        r#"Summarize the following memory entry in 1-2 concise sentences (in the same language as the content).
Focus on the key information. Do not add any preamble or explanation.

Memory type: {}
Category: {}
Content: {}"#,
        entry.memory_type,
        entry.uri.rsplit('/').next().unwrap_or("unknown"),
        entry.content
    )
}

/// Generate an L0 abstract prompt for the LLM.
pub fn abstract_prompt(entry: &MemoryEntry) -> String {
    format!(
        r#"Extract 3-5 keywords or key phrases from the following memory entry.
Output ONLY the keywords, comma-separated, in the same language as the content.
Do not add any preamble, explanation, or numbering.

Memory type: {}
Content: {}"#,
        entry.memory_type, entry.content
    )
}

/// Generate both L1 overview and L0 abstract for a memory entry.
/// Returns (overview, abstract_summary) tuple.
pub async fn generate_summaries(
    driver: &dyn SummaryLlmDriver,
    entry: &MemoryEntry,
) -> (Option<String>, Option<String>) {
    // Generate L1 overview
    let overview = match driver.generate_overview(entry).await {
        Ok(text) => {
            let cleaned = clean_summary(&text);
            if !cleaned.is_empty() {
                Some(cleaned)
            } else {
                None
            }
        }
        Err(e) => {
            tracing::debug!("[Summarizer] Failed to generate overview for {}: {}", entry.uri, e);
            None
        }
    };

    // Generate L0 abstract
    let abstract_summary = match driver.generate_abstract(entry).await {
        Ok(text) => {
            let cleaned = clean_summary(&text);
            if !cleaned.is_empty() {
                Some(cleaned)
            } else {
                None
            }
        }
        Err(e) => {
            tracing::debug!("[Summarizer] Failed to generate abstract for {}: {}", entry.uri, e);
            None
        }
    };

    (overview, abstract_summary)
}

/// Clean LLM response: strip quotes, whitespace, prefixes
fn clean_summary(text: &str) -> String {
    text.trim()
        .trim_start_matches('"')
        .trim_end_matches('"')
        .trim_start_matches('\'')
        .trim_end_matches('\'')
        .trim_start_matches("摘要：")
        .trim_start_matches("摘要:")
        .trim_start_matches("关键词：")
        .trim_start_matches("关键词:")
        .trim_start_matches("Overview:")
        .trim_start_matches("overview:")
        .trim()
        .to_string()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::types::MemoryType;

    struct MockSummaryDriver;

    #[async_trait::async_trait]
    impl SummaryLlmDriver for MockSummaryDriver {
        async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String> {
            Ok(format!("Summary of: {}", &entry.content[..entry.content.len().min(30)]))
        }

        async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
            Ok("keyword1, keyword2, keyword3".to_string())
        }
    }

    fn make_entry(content: &str) -> MemoryEntry {
        MemoryEntry::new("test-agent", MemoryType::Knowledge, "test", content.to_string())
    }

    #[tokio::test]
    async fn test_generate_summaries() {
        let driver = MockSummaryDriver;
        let entry = make_entry("This is a test memory entry about Rust programming.");

        let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;

        assert!(overview.is_some());
        assert!(abstract_summary.is_some());
        assert!(overview.unwrap().contains("Summary of"));
        assert!(abstract_summary.unwrap().contains("keyword1"));
    }

    #[tokio::test]
    async fn test_generate_summaries_handles_error() {
        struct FailingDriver;
        #[async_trait::async_trait]
        impl SummaryLlmDriver for FailingDriver {
            async fn generate_overview(&self, _entry: &MemoryEntry) -> Result<String, String> {
                Err("LLM unavailable".to_string())
            }
            async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
                Err("LLM unavailable".to_string())
            }
        }

        let driver = FailingDriver;
        let entry = make_entry("test content");

        let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;

        assert!(overview.is_none());
        assert!(abstract_summary.is_none());
    }

    #[test]
    fn test_clean_summary() {
        assert_eq!(clean_summary("\"hello world\""), "hello world");
        assert_eq!(clean_summary("摘要：你好"), "你好");
        assert_eq!(clean_summary("  keyword1, keyword2  "), "keyword1, keyword2");
        assert_eq!(clean_summary("Overview: something"), "something");
    }

    #[test]
    fn test_overview_prompt() {
        let entry = make_entry("User prefers dark mode and compact UI");
        let prompt = overview_prompt(&entry);

        assert!(prompt.contains("1-2 concise sentences"));
        assert!(prompt.contains("User prefers dark mode"));
        assert!(prompt.contains("knowledge"));
    }

    #[test]
    fn test_abstract_prompt() {
        let entry = make_entry("Rust is a systems programming language");
        let prompt = abstract_prompt(&entry);

        assert!(prompt.contains("3-5 keywords"));
        assert!(prompt.contains("Rust is a systems"));
    }
}