refactor: 统一项目名称从OpenFang到ZCLAW
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

重构所有代码和文档中的项目名称,将OpenFang统一更新为ZCLAW。包括:
- 配置文件中的项目名称
- 代码注释和文档引用
- 环境变量和路径
- 类型定义和接口名称
- 测试用例和模拟数据

同时优化部分代码结构,移除未使用的模块,并更新相关依赖项。
This commit is contained in:
iven
2026-03-27 07:36:03 +08:00
parent 4b08804aa9
commit 0d4fa96b82
226 changed files with 7288 additions and 5788 deletions

View File

@@ -32,6 +32,7 @@ uuid = { workspace = true }
# Database
sqlx = { workspace = true }
libsqlite3-sys = { workspace = true }
# Internal crates
zclaw-types = { workspace = true }

View File

@@ -388,6 +388,8 @@ mod tests {
access_count: 0,
created_at: Utc::now(),
last_accessed: Utc::now(),
overview: None,
abstract_summary: None,
}
}

View File

@@ -63,6 +63,7 @@ pub mod tracker;
pub mod viking_adapter;
pub mod storage;
pub mod retrieval;
pub mod summarizer;
// Re-export main types for convenience
pub use types::{
@@ -82,7 +83,8 @@ pub use injector::{InjectionFormat, PromptInjector};
pub use tracker::{AgentMetadata, GrowthTracker, LearningEvent};
pub use viking_adapter::{FindOptions, VikingAdapter, VikingLevel, VikingStorage};
pub use storage::SqliteStorage;
pub use retrieval::{MemoryCache, QueryAnalyzer, SemanticScorer};
pub use retrieval::{EmbeddingClient, MemoryCache, QueryAnalyzer, SemanticScorer};
pub use summarizer::SummaryLlmDriver;
/// Growth system configuration
#[derive(Debug, Clone)]

View File

@@ -18,7 +18,8 @@ struct CacheEntry {
access_count: u32,
}
/// Cache key for efficient lookups
/// Cache key for efficient lookups (reserved for future cache optimization)
#[allow(dead_code)]
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
struct CacheKey {
agent_id: String,

View File

@@ -9,6 +9,6 @@ pub mod semantic;
pub mod query;
pub mod cache;
pub use semantic::SemanticScorer;
pub use semantic::{EmbeddingClient, SemanticScorer};
pub use query::QueryAnalyzer;
pub use cache::MemoryCache;

View File

@@ -253,8 +253,13 @@ impl SemanticScorer {
}
}
/// Get pre-computed embedding for an entry
pub fn get_entry_embedding(&self, uri: &str) -> Option<Vec<f32>> {
self.entry_embeddings.get(uri).cloned()
}
/// Compute cosine similarity between two embedding vectors
fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
pub fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
if v1.is_empty() || v2.is_empty() || v1.len() != v2.len() {
return 0.0;
}

View File

@@ -3,7 +3,7 @@
//! Persistent storage backend using SQLite for production use.
//! Provides efficient querying and full-text search capabilities.
use crate::retrieval::semantic::SemanticScorer;
use crate::retrieval::semantic::{EmbeddingClient, SemanticScorer};
use crate::types::MemoryEntry;
use crate::viking_adapter::{FindOptions, VikingStorage};
use async_trait::async_trait;
@@ -36,6 +36,8 @@ struct MemoryRow {
access_count: i32,
created_at: String,
last_accessed: String,
overview: Option<String>,
abstract_summary: Option<String>,
}
impl SqliteStorage {
@@ -83,6 +85,26 @@ impl SqliteStorage {
Self::new(":memory:").await.expect("Failed to create in-memory database")
}
/// Configure embedding client for semantic search
/// Replaces the current scorer with a new one that has embedding support
pub async fn configure_embedding(
&self,
client: Arc<dyn EmbeddingClient>,
) -> Result<()> {
let new_scorer = SemanticScorer::with_embedding(client);
let mut scorer = self.scorer.write().await;
*scorer = new_scorer;
tracing::info!("[SqliteStorage] Embedding client configured, re-indexing with embeddings...");
self.warmup_scorer_with_embedding().await
}
/// Check if embedding is available
pub async fn is_embedding_available(&self) -> bool {
let scorer = self.scorer.read().await;
scorer.is_embedding_available()
}
/// Initialize database schema with FTS5
async fn initialize_schema(&self) -> Result<()> {
// Create main memories table
@@ -131,6 +153,16 @@ impl SqliteStorage {
.await
.map_err(|e| ZclawError::StorageError(format!("Failed to create importance index: {}", e)))?;
// Migration: add overview column (L1 summary)
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
.execute(&self.pool)
.await;
// Migration: add abstract_summary column (L0 keywords)
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN abstract_summary TEXT")
.execute(&self.pool)
.await;
// Create metadata table
sqlx::query(
r#"
@@ -151,7 +183,7 @@ impl SqliteStorage {
/// Warmup semantic scorer with existing entries
async fn warmup_scorer(&self) -> Result<()> {
let rows = sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
)
.fetch_all(&self.pool)
.await
@@ -173,6 +205,32 @@ impl SqliteStorage {
Ok(())
}
/// Warmup semantic scorer with embedding support for existing entries
async fn warmup_scorer_with_embedding(&self) -> Result<()> {
let rows = sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
)
.fetch_all(&self.pool)
.await
.map_err(|e| ZclawError::StorageError(format!("Failed to load memories for warmup: {}", e)))?;
let mut scorer = self.scorer.write().await;
for row in rows {
let entry = self.row_to_entry(&row);
scorer.index_entry_with_embedding(&entry).await;
}
let stats = scorer.stats();
tracing::info!(
"[SqliteStorage] Warmed up scorer with {} entries ({} with embeddings), {} terms",
stats.indexed_entries,
stats.embedding_entries,
stats.unique_terms
);
Ok(())
}
/// Convert database row to MemoryEntry
fn row_to_entry(&self, row: &MemoryRow) -> MemoryEntry {
let memory_type = crate::types::MemoryType::parse(&row.memory_type);
@@ -193,6 +251,8 @@ impl SqliteStorage {
access_count: row.access_count as u32,
created_at,
last_accessed,
overview: row.overview.clone(),
abstract_summary: row.abstract_summary.clone(),
}
}
@@ -223,6 +283,8 @@ impl sqlx::FromRow<'_, SqliteRow> for MemoryRow {
access_count: row.try_get("access_count")?,
created_at: row.try_get("created_at")?,
last_accessed: row.try_get("last_accessed")?,
overview: row.try_get("overview").ok(),
abstract_summary: row.try_get("abstract_summary").ok(),
})
}
}
@@ -241,8 +303,8 @@ impl VikingStorage for SqliteStorage {
sqlx::query(
r#"
INSERT OR REPLACE INTO memories
(uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
(uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#,
)
.bind(&entry.uri)
@@ -253,6 +315,8 @@ impl VikingStorage for SqliteStorage {
.bind(entry.access_count as i32)
.bind(&created_at)
.bind(&last_accessed)
.bind(&entry.overview)
.bind(&entry.abstract_summary)
.execute(&self.pool)
.await
.map_err(|e| ZclawError::StorageError(format!("Failed to store memory: {}", e)))?;
@@ -276,9 +340,13 @@ impl VikingStorage for SqliteStorage {
.execute(&self.pool)
.await;
// Update semantic scorer
// Update semantic scorer (use embedding when available)
let mut scorer = self.scorer.write().await;
scorer.index_entry(entry);
if scorer.is_embedding_available() {
scorer.index_entry_with_embedding(entry).await;
} else {
scorer.index_entry(entry);
}
tracing::debug!("[SqliteStorage] Stored memory: {}", entry.uri);
Ok(())
@@ -286,7 +354,7 @@ impl VikingStorage for SqliteStorage {
async fn get(&self, uri: &str) -> Result<Option<MemoryEntry>> {
let row = sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri = ?"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri = ?"
)
.bind(uri)
.fetch_optional(&self.pool)
@@ -309,7 +377,7 @@ impl VikingStorage for SqliteStorage {
// Get all matching entries
let rows = if let Some(ref scope) = options.scope {
sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
)
.bind(format!("{}%", scope))
.fetch_all(&self.pool)
@@ -317,7 +385,7 @@ impl VikingStorage for SqliteStorage {
.map_err(|e| ZclawError::StorageError(format!("Failed to find memories: {}", e)))?
} else {
sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
)
.fetch_all(&self.pool)
.await
@@ -325,14 +393,49 @@ impl VikingStorage for SqliteStorage {
};
// Convert to entries and compute semantic scores
let scorer = self.scorer.read().await;
let use_embedding = {
let scorer = self.scorer.read().await;
scorer.is_embedding_available()
};
let mut scored_entries: Vec<(f32, MemoryEntry)> = Vec::new();
for row in rows {
let entry = self.row_to_entry(&row);
// Compute semantic score using TF-IDF
let semantic_score = scorer.score_similarity(query, &entry);
// Compute semantic score: use embedding when available, fallback to TF-IDF
let semantic_score = if use_embedding {
let scorer = self.scorer.read().await;
let tfidf_score = scorer.score_similarity(query, &entry);
let entry_embedding = scorer.get_entry_embedding(&entry.uri);
drop(scorer);
match entry_embedding {
Some(entry_emb) => {
// Try embedding the query for hybrid scoring
let embedding_client = {
let scorer2 = self.scorer.read().await;
scorer2.get_embedding_client()
};
match embedding_client.embed(query).await {
Ok(query_emb) => {
let emb_score = SemanticScorer::cosine_similarity_embedding(&query_emb, &entry_emb);
// Hybrid: 70% embedding + 30% TF-IDF
emb_score * 0.7 + tfidf_score * 0.3
}
Err(_) => {
tracing::debug!("[SqliteStorage] Query embedding failed, using TF-IDF only");
tfidf_score
}
}
}
None => tfidf_score,
}
} else {
let scorer = self.scorer.read().await;
scorer.score_similarity(query, &entry)
};
// Apply similarity threshold
if let Some(min_similarity) = options.min_similarity {
@@ -362,7 +465,7 @@ impl VikingStorage for SqliteStorage {
async fn find_by_prefix(&self, prefix: &str) -> Result<Vec<MemoryEntry>> {
let rows = sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
)
.bind(format!("{}%", prefix))
.fetch_all(&self.pool)

View File

@@ -0,0 +1,192 @@
//! Memory Summarizer - L0/L1 Summary Generation
//!
//! Provides trait and functions for generating layered summaries of memory entries:
//! - L1 Overview: 1-2 sentence summary (~200 tokens)
//! - L0 Abstract: 3-5 keywords (~100 tokens)
//!
//! The trait-based design allows zclaw-growth to remain decoupled from any
//! specific LLM implementation. The Tauri layer provides a concrete implementation.
use crate::types::MemoryEntry;
/// LLM driver for summary generation.
/// Implementations call an LLM API to produce concise summaries.
#[async_trait::async_trait]
pub trait SummaryLlmDriver: Send + Sync {
/// Generate a short summary (1-2 sentences, ~200 tokens) for a memory entry.
async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String>;
/// Generate keyword extraction (3-5 keywords, ~100 tokens) for a memory entry.
async fn generate_abstract(&self, entry: &MemoryEntry) -> Result<String, String>;
}
/// Generate an L1 overview prompt for the LLM.
pub fn overview_prompt(entry: &MemoryEntry) -> String {
format!(
r#"Summarize the following memory entry in 1-2 concise sentences (in the same language as the content).
Focus on the key information. Do not add any preamble or explanation.
Memory type: {}
Category: {}
Content: {}"#,
entry.memory_type,
entry.uri.rsplit('/').next().unwrap_or("unknown"),
entry.content
)
}
/// Generate an L0 abstract prompt for the LLM.
pub fn abstract_prompt(entry: &MemoryEntry) -> String {
format!(
r#"Extract 3-5 keywords or key phrases from the following memory entry.
Output ONLY the keywords, comma-separated, in the same language as the content.
Do not add any preamble, explanation, or numbering.
Memory type: {}
Content: {}"#,
entry.memory_type, entry.content
)
}
/// Generate both L1 overview and L0 abstract for a memory entry.
/// Returns (overview, abstract_summary) tuple.
pub async fn generate_summaries(
driver: &dyn SummaryLlmDriver,
entry: &MemoryEntry,
) -> (Option<String>, Option<String>) {
// Generate L1 overview
let overview = match driver.generate_overview(entry).await {
Ok(text) => {
let cleaned = clean_summary(&text);
if !cleaned.is_empty() {
Some(cleaned)
} else {
None
}
}
Err(e) => {
tracing::debug!("[Summarizer] Failed to generate overview for {}: {}", entry.uri, e);
None
}
};
// Generate L0 abstract
let abstract_summary = match driver.generate_abstract(entry).await {
Ok(text) => {
let cleaned = clean_summary(&text);
if !cleaned.is_empty() {
Some(cleaned)
} else {
None
}
}
Err(e) => {
tracing::debug!("[Summarizer] Failed to generate abstract for {}: {}", entry.uri, e);
None
}
};
(overview, abstract_summary)
}
/// Clean LLM response: strip quotes, whitespace, prefixes
fn clean_summary(text: &str) -> String {
text.trim()
.trim_start_matches('"')
.trim_end_matches('"')
.trim_start_matches('\'')
.trim_end_matches('\'')
.trim_start_matches("摘要:")
.trim_start_matches("摘要:")
.trim_start_matches("关键词:")
.trim_start_matches("关键词:")
.trim_start_matches("Overview:")
.trim_start_matches("overview:")
.trim()
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::MemoryType;
struct MockSummaryDriver;
#[async_trait::async_trait]
impl SummaryLlmDriver for MockSummaryDriver {
async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String> {
Ok(format!("Summary of: {}", &entry.content[..entry.content.len().min(30)]))
}
async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
Ok("keyword1, keyword2, keyword3".to_string())
}
}
fn make_entry(content: &str) -> MemoryEntry {
MemoryEntry::new("test-agent", MemoryType::Knowledge, "test", content.to_string())
}
#[tokio::test]
async fn test_generate_summaries() {
let driver = MockSummaryDriver;
let entry = make_entry("This is a test memory entry about Rust programming.");
let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
assert!(overview.is_some());
assert!(abstract_summary.is_some());
assert!(overview.unwrap().contains("Summary of"));
assert!(abstract_summary.unwrap().contains("keyword1"));
}
#[tokio::test]
async fn test_generate_summaries_handles_error() {
struct FailingDriver;
#[async_trait::async_trait]
impl SummaryLlmDriver for FailingDriver {
async fn generate_overview(&self, _entry: &MemoryEntry) -> Result<String, String> {
Err("LLM unavailable".to_string())
}
async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
Err("LLM unavailable".to_string())
}
}
let driver = FailingDriver;
let entry = make_entry("test content");
let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
assert!(overview.is_none());
assert!(abstract_summary.is_none());
}
#[test]
fn test_clean_summary() {
assert_eq!(clean_summary("\"hello world\""), "hello world");
assert_eq!(clean_summary("摘要:你好"), "你好");
assert_eq!(clean_summary(" keyword1, keyword2 "), "keyword1, keyword2");
assert_eq!(clean_summary("Overview: something"), "something");
}
#[test]
fn test_overview_prompt() {
let entry = make_entry("User prefers dark mode and compact UI");
let prompt = overview_prompt(&entry);
assert!(prompt.contains("1-2 concise sentences"));
assert!(prompt.contains("User prefers dark mode"));
assert!(prompt.contains("knowledge"));
}
#[test]
fn test_abstract_prompt() {
let entry = make_entry("Rust is a systems programming language");
let prompt = abstract_prompt(&entry);
assert!(prompt.contains("3-5 keywords"));
assert!(prompt.contains("Rust is a systems"));
}
}

View File

@@ -72,6 +72,10 @@ pub struct MemoryEntry {
pub created_at: DateTime<Utc>,
/// Last access timestamp
pub last_accessed: DateTime<Utc>,
/// L1 overview: 1-2 sentence summary (~200 tokens)
pub overview: Option<String>,
/// L0 abstract: 3-5 keywords (~100 tokens)
pub abstract_summary: Option<String>,
}
impl MemoryEntry {
@@ -92,6 +96,8 @@ impl MemoryEntry {
access_count: 0,
created_at: Utc::now(),
last_accessed: Utc::now(),
overview: None,
abstract_summary: None,
}
}
@@ -107,6 +113,18 @@ impl MemoryEntry {
self
}
/// Set L1 overview summary
pub fn with_overview(mut self, overview: impl Into<String>) -> Self {
self.overview = Some(overview.into());
self
}
/// Set L0 abstract summary
pub fn with_abstract_summary(mut self, abstract_summary: impl Into<String>) -> Self {
self.abstract_summary = Some(abstract_summary.into());
self
}
/// Mark as accessed
pub fn touch(&mut self) {
self.access_count += 1;