refactor: 统一项目名称从OpenFang到ZCLAW

重构所有代码和文档中的项目名称，将OpenFang统一更新为ZCLAW。包括： - 配置文件中的项目名称 - 代码注释和文档引用 - 环境变量和路径 - 类型定义和接口名称 - 测试用例和模拟数据同时优化部分代码结构，移除未使用的模块，并更新相关依赖项。
2026-03-27 07:36:03 +08:00
parent 4b08804aa9
commit 0d4fa96b82
226 changed files with 7288 additions and 5788 deletions
--- a/crates/zclaw-growth/Cargo.toml
+++ b/crates/zclaw-growth/Cargo.toml
@@ -32,6 +32,7 @@ uuid = { workspace = true }

 # Database
 sqlx = { workspace = true }
+libsqlite3-sys = { workspace = true }

 # Internal crates
 zclaw-types = { workspace = true }
--- a/crates/zclaw-growth/src/injector.rs
+++ b/crates/zclaw-growth/src/injector.rs
@@ -388,6 +388,8 @@ mod tests {
            access_count: 0,
            created_at: Utc::now(),
            last_accessed: Utc::now(),
+            overview: None,
+            abstract_summary: None,
        }
    }

--- a/crates/zclaw-growth/src/lib.rs
+++ b/crates/zclaw-growth/src/lib.rs
@@ -63,6 +63,7 @@ pub mod tracker;
 pub mod viking_adapter;
 pub mod storage;
 pub mod retrieval;
+pub mod summarizer;

 // Re-export main types for convenience
 pub use types::{
@@ -82,7 +83,8 @@ pub use injector::{InjectionFormat, PromptInjector};
 pub use tracker::{AgentMetadata, GrowthTracker, LearningEvent};
 pub use viking_adapter::{FindOptions, VikingAdapter, VikingLevel, VikingStorage};
 pub use storage::SqliteStorage;
-pub use retrieval::{MemoryCache, QueryAnalyzer, SemanticScorer};
+pub use retrieval::{EmbeddingClient, MemoryCache, QueryAnalyzer, SemanticScorer};
+pub use summarizer::SummaryLlmDriver;

 /// Growth system configuration
 #[derive(Debug, Clone)]
--- a/crates/zclaw-growth/src/retrieval/cache.rs
+++ b/crates/zclaw-growth/src/retrieval/cache.rs
@@ -18,7 +18,8 @@ struct CacheEntry {
    access_count: u32,
 }

-/// Cache key for efficient lookups
+/// Cache key for efficient lookups (reserved for future cache optimization)
+#[allow(dead_code)]
 #[derive(Debug, Clone, Hash, Eq, PartialEq)]
 struct CacheKey {
    agent_id: String,
--- a/crates/zclaw-growth/src/retrieval/mod.rs
+++ b/crates/zclaw-growth/src/retrieval/mod.rs
@@ -9,6 +9,6 @@ pub mod semantic;
 pub mod query;
 pub mod cache;

-pub use semantic::SemanticScorer;
+pub use semantic::{EmbeddingClient, SemanticScorer};
 pub use query::QueryAnalyzer;
 pub use cache::MemoryCache;
--- a/crates/zclaw-growth/src/retrieval/semantic.rs
+++ b/crates/zclaw-growth/src/retrieval/semantic.rs
@@ -253,8 +253,13 @@ impl SemanticScorer {
        }
    }

+    /// Get pre-computed embedding for an entry
+    pub fn get_entry_embedding(&self, uri: &str) -> Option<Vec<f32>> {
+        self.entry_embeddings.get(uri).cloned()
+    }
+
    /// Compute cosine similarity between two embedding vectors
-    fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
+    pub fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
        if v1.is_empty() || v2.is_empty() || v1.len() != v2.len() {
            return 0.0;
        }
--- a/crates/zclaw-growth/src/storage/sqlite.rs
+++ b/crates/zclaw-growth/src/storage/sqlite.rs
@@ -3,7 +3,7 @@
 //! Persistent storage backend using SQLite for production use.
 //! Provides efficient querying and full-text search capabilities.

-use crate::retrieval::semantic::SemanticScorer;
+use crate::retrieval::semantic::{EmbeddingClient, SemanticScorer};
 use crate::types::MemoryEntry;
 use crate::viking_adapter::{FindOptions, VikingStorage};
 use async_trait::async_trait;
@@ -36,6 +36,8 @@ struct MemoryRow {
    access_count: i32,
    created_at: String,
    last_accessed: String,
+    overview: Option<String>,
+    abstract_summary: Option<String>,
 }

 impl SqliteStorage {
@@ -83,6 +85,26 @@ impl SqliteStorage {
        Self::new(":memory:").await.expect("Failed to create in-memory database")
    }

+    /// Configure embedding client for semantic search
+    /// Replaces the current scorer with a new one that has embedding support
+    pub async fn configure_embedding(
+        &self,
+        client: Arc<dyn EmbeddingClient>,
+    ) -> Result<()> {
+        let new_scorer = SemanticScorer::with_embedding(client);
+        let mut scorer = self.scorer.write().await;
+        *scorer = new_scorer;
+
+        tracing::info!("[SqliteStorage] Embedding client configured, re-indexing with embeddings...");
+        self.warmup_scorer_with_embedding().await
+    }
+
+    /// Check if embedding is available
+    pub async fn is_embedding_available(&self) -> bool {
+        let scorer = self.scorer.read().await;
+        scorer.is_embedding_available()
+    }
+
    /// Initialize database schema with FTS5
    async fn initialize_schema(&self) -> Result<()> {
        // Create main memories table
@@ -131,6 +153,16 @@ impl SqliteStorage {
            .await
            .map_err(|e| ZclawError::StorageError(format!("Failed to create importance index: {}", e)))?;

+        // Migration: add overview column (L1 summary)
+        let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
+            .execute(&self.pool)
+            .await;
+
+        // Migration: add abstract_summary column (L0 keywords)
+        let _ = sqlx::query("ALTER TABLE memories ADD COLUMN abstract_summary TEXT")
+            .execute(&self.pool)
+            .await;
+
        // Create metadata table
        sqlx::query(
            r#"
@@ -151,7 +183,7 @@ impl SqliteStorage {
    /// Warmup semantic scorer with existing entries
    async fn warmup_scorer(&self) -> Result<()> {
        let rows = sqlx::query_as::<_, MemoryRow>(
-            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
+            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
        )
        .fetch_all(&self.pool)
        .await
@@ -173,6 +205,32 @@ impl SqliteStorage {
        Ok(())
    }

+    /// Warmup semantic scorer with embedding support for existing entries
+    async fn warmup_scorer_with_embedding(&self) -> Result<()> {
+        let rows = sqlx::query_as::<_, MemoryRow>(
+            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
+        )
+        .fetch_all(&self.pool)
+        .await
+        .map_err(|e| ZclawError::StorageError(format!("Failed to load memories for warmup: {}", e)))?;
+
+        let mut scorer = self.scorer.write().await;
+        for row in rows {
+            let entry = self.row_to_entry(&row);
+            scorer.index_entry_with_embedding(&entry).await;
+        }
+
+        let stats = scorer.stats();
+        tracing::info!(
+            "[SqliteStorage] Warmed up scorer with {} entries ({} with embeddings), {} terms",
+            stats.indexed_entries,
+            stats.embedding_entries,
+            stats.unique_terms
+        );
+
+        Ok(())
+    }
+
    /// Convert database row to MemoryEntry
    fn row_to_entry(&self, row: &MemoryRow) -> MemoryEntry {
        let memory_type = crate::types::MemoryType::parse(&row.memory_type);
@@ -193,6 +251,8 @@ impl SqliteStorage {
            access_count: row.access_count as u32,
            created_at,
            last_accessed,
+            overview: row.overview.clone(),
+            abstract_summary: row.abstract_summary.clone(),
        }
    }

@@ -223,6 +283,8 @@ impl sqlx::FromRow<'_, SqliteRow> for MemoryRow {
            access_count: row.try_get("access_count")?,
            created_at: row.try_get("created_at")?,
            last_accessed: row.try_get("last_accessed")?,
+            overview: row.try_get("overview").ok(),
+            abstract_summary: row.try_get("abstract_summary").ok(),
        })
    }
 }
@@ -241,8 +303,8 @@ impl VikingStorage for SqliteStorage {
        sqlx::query(
            r#"
            INSERT OR REPLACE INTO memories
-            (uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            (uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            "#,
        )
        .bind(&entry.uri)
@@ -253,6 +315,8 @@ impl VikingStorage for SqliteStorage {
        .bind(entry.access_count as i32)
        .bind(&created_at)
        .bind(&last_accessed)
+        .bind(&entry.overview)
+        .bind(&entry.abstract_summary)
        .execute(&self.pool)
        .await
        .map_err(|e| ZclawError::StorageError(format!("Failed to store memory: {}", e)))?;
@@ -276,9 +340,13 @@ impl VikingStorage for SqliteStorage {
        .execute(&self.pool)
        .await;

-        // Update semantic scorer
+        // Update semantic scorer (use embedding when available)
        let mut scorer = self.scorer.write().await;
-        scorer.index_entry(entry);
+        if scorer.is_embedding_available() {
+            scorer.index_entry_with_embedding(entry).await;
+        } else {
+            scorer.index_entry(entry);
+        }

        tracing::debug!("[SqliteStorage] Stored memory: {}", entry.uri);
        Ok(())
@@ -286,7 +354,7 @@ impl VikingStorage for SqliteStorage {

    async fn get(&self, uri: &str) -> Result<Option<MemoryEntry>> {
        let row = sqlx::query_as::<_, MemoryRow>(
-            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri = ?"
+            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri = ?"
        )
        .bind(uri)
        .fetch_optional(&self.pool)
@@ -309,7 +377,7 @@ impl VikingStorage for SqliteStorage {
        // Get all matching entries
        let rows = if let Some(ref scope) = options.scope {
            sqlx::query_as::<_, MemoryRow>(
-                "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
+                "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
            )
            .bind(format!("{}%", scope))
            .fetch_all(&self.pool)
@@ -317,7 +385,7 @@ impl VikingStorage for SqliteStorage {
            .map_err(|e| ZclawError::StorageError(format!("Failed to find memories: {}", e)))?
        } else {
            sqlx::query_as::<_, MemoryRow>(
-                "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
+                "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
            )
            .fetch_all(&self.pool)
            .await
@@ -325,14 +393,49 @@ impl VikingStorage for SqliteStorage {
        };

        // Convert to entries and compute semantic scores
-        let scorer = self.scorer.read().await;
+        let use_embedding = {
+            let scorer = self.scorer.read().await;
+            scorer.is_embedding_available()
+        };
+
        let mut scored_entries: Vec<(f32, MemoryEntry)> = Vec::new();

        for row in rows {
            let entry = self.row_to_entry(&row);

-            // Compute semantic score using TF-IDF
-            let semantic_score = scorer.score_similarity(query, &entry);
+            // Compute semantic score: use embedding when available, fallback to TF-IDF
+            let semantic_score = if use_embedding {
+                let scorer = self.scorer.read().await;
+                let tfidf_score = scorer.score_similarity(query, &entry);
+                let entry_embedding = scorer.get_entry_embedding(&entry.uri);
+                drop(scorer);
+
+                match entry_embedding {
+                    Some(entry_emb) => {
+                        // Try embedding the query for hybrid scoring
+                        let embedding_client = {
+                            let scorer2 = self.scorer.read().await;
+                            scorer2.get_embedding_client()
+                        };
+
+                        match embedding_client.embed(query).await {
+                            Ok(query_emb) => {
+                                let emb_score = SemanticScorer::cosine_similarity_embedding(&query_emb, &entry_emb);
+                                // Hybrid: 70% embedding + 30% TF-IDF
+                                emb_score * 0.7 + tfidf_score * 0.3
+                            }
+                            Err(_) => {
+                                tracing::debug!("[SqliteStorage] Query embedding failed, using TF-IDF only");
+                                tfidf_score
+                            }
+                        }
+                    }
+                    None => tfidf_score,
+                }
+            } else {
+                let scorer = self.scorer.read().await;
+                scorer.score_similarity(query, &entry)
+            };

            // Apply similarity threshold
            if let Some(min_similarity) = options.min_similarity {
@@ -362,7 +465,7 @@ impl VikingStorage for SqliteStorage {

    async fn find_by_prefix(&self, prefix: &str) -> Result<Vec<MemoryEntry>> {
        let rows = sqlx::query_as::<_, MemoryRow>(
-            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
+            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
        )
        .bind(format!("{}%", prefix))
        .fetch_all(&self.pool)
--- a/crates/zclaw-growth/src/summarizer.rs
+++ b/crates/zclaw-growth/src/summarizer.rs
@@ -0,0 +1,192 @@
+//! Memory Summarizer - L0/L1 Summary Generation
+//!
+//! Provides trait and functions for generating layered summaries of memory entries:
+//! - L1 Overview: 1-2 sentence summary (~200 tokens)
+//! - L0 Abstract: 3-5 keywords (~100 tokens)
+//!
+//! The trait-based design allows zclaw-growth to remain decoupled from any
+//! specific LLM implementation. The Tauri layer provides a concrete implementation.
+
+use crate::types::MemoryEntry;
+
+/// LLM driver for summary generation.
+/// Implementations call an LLM API to produce concise summaries.
+#[async_trait::async_trait]
+pub trait SummaryLlmDriver: Send + Sync {
+    /// Generate a short summary (1-2 sentences, ~200 tokens) for a memory entry.
+    async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String>;
+
+    /// Generate keyword extraction (3-5 keywords, ~100 tokens) for a memory entry.
+    async fn generate_abstract(&self, entry: &MemoryEntry) -> Result<String, String>;
+}
+
+/// Generate an L1 overview prompt for the LLM.
+pub fn overview_prompt(entry: &MemoryEntry) -> String {
+    format!(
+        r#"Summarize the following memory entry in 1-2 concise sentences (in the same language as the content).
+Focus on the key information. Do not add any preamble or explanation.
+
+Memory type: {}
+Category: {}
+Content: {}"#,
+        entry.memory_type,
+        entry.uri.rsplit('/').next().unwrap_or("unknown"),
+        entry.content
+    )
+}
+
+/// Generate an L0 abstract prompt for the LLM.
+pub fn abstract_prompt(entry: &MemoryEntry) -> String {
+    format!(
+        r#"Extract 3-5 keywords or key phrases from the following memory entry.
+Output ONLY the keywords, comma-separated, in the same language as the content.
+Do not add any preamble, explanation, or numbering.
+
+Memory type: {}
+Content: {}"#,
+        entry.memory_type, entry.content
+    )
+}
+
+/// Generate both L1 overview and L0 abstract for a memory entry.
+/// Returns (overview, abstract_summary) tuple.
+pub async fn generate_summaries(
+    driver: &dyn SummaryLlmDriver,
+    entry: &MemoryEntry,
+) -> (Option<String>, Option<String>) {
+    // Generate L1 overview
+    let overview = match driver.generate_overview(entry).await {
+        Ok(text) => {
+            let cleaned = clean_summary(&text);
+            if !cleaned.is_empty() {
+                Some(cleaned)
+            } else {
+                None
+            }
+        }
+        Err(e) => {
+            tracing::debug!("[Summarizer] Failed to generate overview for {}: {}", entry.uri, e);
+            None
+        }
+    };
+
+    // Generate L0 abstract
+    let abstract_summary = match driver.generate_abstract(entry).await {
+        Ok(text) => {
+            let cleaned = clean_summary(&text);
+            if !cleaned.is_empty() {
+                Some(cleaned)
+            } else {
+                None
+            }
+        }
+        Err(e) => {
+            tracing::debug!("[Summarizer] Failed to generate abstract for {}: {}", entry.uri, e);
+            None
+        }
+    };
+
+    (overview, abstract_summary)
+}
+
+/// Clean LLM response: strip quotes, whitespace, prefixes
+fn clean_summary(text: &str) -> String {
+    text.trim()
+        .trim_start_matches('"')
+        .trim_end_matches('"')
+        .trim_start_matches('\'')
+        .trim_end_matches('\'')
+        .trim_start_matches("摘要：")
+        .trim_start_matches("摘要:")
+        .trim_start_matches("关键词：")
+        .trim_start_matches("关键词:")
+        .trim_start_matches("Overview:")
+        .trim_start_matches("overview:")
+        .trim()
+        .to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::MemoryType;
+
+    struct MockSummaryDriver;
+
+    #[async_trait::async_trait]
+    impl SummaryLlmDriver for MockSummaryDriver {
+        async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String> {
+            Ok(format!("Summary of: {}", &entry.content[..entry.content.len().min(30)]))
+        }
+
+        async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
+            Ok("keyword1, keyword2, keyword3".to_string())
+        }
+    }
+
+    fn make_entry(content: &str) -> MemoryEntry {
+        MemoryEntry::new("test-agent", MemoryType::Knowledge, "test", content.to_string())
+    }
+
+    #[tokio::test]
+    async fn test_generate_summaries() {
+        let driver = MockSummaryDriver;
+        let entry = make_entry("This is a test memory entry about Rust programming.");
+
+        let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
+
+        assert!(overview.is_some());
+        assert!(abstract_summary.is_some());
+        assert!(overview.unwrap().contains("Summary of"));
+        assert!(abstract_summary.unwrap().contains("keyword1"));
+    }
+
+    #[tokio::test]
+    async fn test_generate_summaries_handles_error() {
+        struct FailingDriver;
+        #[async_trait::async_trait]
+        impl SummaryLlmDriver for FailingDriver {
+            async fn generate_overview(&self, _entry: &MemoryEntry) -> Result<String, String> {
+                Err("LLM unavailable".to_string())
+            }
+            async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
+                Err("LLM unavailable".to_string())
+            }
+        }
+
+        let driver = FailingDriver;
+        let entry = make_entry("test content");
+
+        let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
+
+        assert!(overview.is_none());
+        assert!(abstract_summary.is_none());
+    }
+
+    #[test]
+    fn test_clean_summary() {
+        assert_eq!(clean_summary("\"hello world\""), "hello world");
+        assert_eq!(clean_summary("摘要：你好"), "你好");
+        assert_eq!(clean_summary("  keyword1, keyword2  "), "keyword1, keyword2");
+        assert_eq!(clean_summary("Overview: something"), "something");
+    }
+
+    #[test]
+    fn test_overview_prompt() {
+        let entry = make_entry("User prefers dark mode and compact UI");
+        let prompt = overview_prompt(&entry);
+
+        assert!(prompt.contains("1-2 concise sentences"));
+        assert!(prompt.contains("User prefers dark mode"));
+        assert!(prompt.contains("knowledge"));
+    }
+
+    #[test]
+    fn test_abstract_prompt() {
+        let entry = make_entry("Rust is a systems programming language");
+        let prompt = abstract_prompt(&entry);
+
+        assert!(prompt.contains("3-5 keywords"));
+        assert!(prompt.contains("Rust is a systems"));
+    }
+}
--- a/crates/zclaw-growth/src/types.rs
+++ b/crates/zclaw-growth/src/types.rs
@@ -72,6 +72,10 @@ pub struct MemoryEntry {
    pub created_at: DateTime<Utc>,
    /// Last access timestamp
    pub last_accessed: DateTime<Utc>,
+    /// L1 overview: 1-2 sentence summary (~200 tokens)
+    pub overview: Option<String>,
+    /// L0 abstract: 3-5 keywords (~100 tokens)
+    pub abstract_summary: Option<String>,
 }

 impl MemoryEntry {
@@ -92,6 +96,8 @@ impl MemoryEntry {
            access_count: 0,
            created_at: Utc::now(),
            last_accessed: Utc::now(),
+            overview: None,
+            abstract_summary: None,
        }
    }

@@ -107,6 +113,18 @@ impl MemoryEntry {
        self
    }

+    /// Set L1 overview summary
+    pub fn with_overview(mut self, overview: impl Into<String>) -> Self {
+        self.overview = Some(overview.into());
+        self
+    }
+
+    /// Set L0 abstract summary
+    pub fn with_abstract_summary(mut self, abstract_summary: impl Into<String>) -> Self {
+        self.abstract_summary = Some(abstract_summary.into());
+        self
+    }
+
    /// Mark as accessed
    pub fn touch(&mut self) {
        self.access_count += 1;