refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions
--- a/crates/zclaw-memory/Cargo.toml
+++ b/crates/zclaw-memory/Cargo.toml
@@ -24,3 +24,6 @@ libsqlite3-sys = { workspace = true }

 # Async utilities
 futures = { workspace = true }
+async-trait = { workspace = true }
+
+anyhow = { workspace = true }
--- a/crates/zclaw-memory/src/fact.rs
+++ b/crates/zclaw-memory/src/fact.rs
@@ -0,0 +1,202 @@
+//! Structured fact extraction and storage.
+//!
+//! Inspired by DeerFlow's LLM-driven fact extraction with deduplication
+//! and confidence scoring. Facts are natural language statements extracted
+//! from conversations, categorized and scored for retrieval quality.
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::{SystemTime, UNIX_EPOCH};
+
+/// Global counter for generating unique fact IDs without uuid dependency overhead.
+static FACT_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+fn now_secs() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_secs()
+}
+
+fn next_fact_id() -> String {
+    let ts = now_secs();
+    let seq = FACT_COUNTER.fetch_add(1, Ordering::Relaxed);
+    format!("fact-{}-{}", ts, seq)
+}
+
+/// A structured fact extracted from conversation.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Fact {
+    /// Unique identifier
+    pub id: String,
+    /// The fact content (natural language)
+    pub content: String,
+    /// Category of the fact
+    pub category: FactCategory,
+    /// Confidence score (0.0 - 1.0)
+    pub confidence: f64,
+    /// When this fact was extracted (unix timestamp in seconds)
+    pub created_at: u64,
+    /// Source session ID
+    pub source: Option<String>,
+}
+
+/// Categories for structured facts.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum FactCategory {
+    /// User preference (language, style, format)
+    Preference,
+    /// Domain knowledge or context
+    Knowledge,
+    /// Behavioral pattern or habit
+    Behavior,
+    /// Task-specific context
+    TaskContext,
+    /// General information
+    General,
+}
+
+impl Fact {
+    /// Create a new fact with auto-generated ID and timestamp.
+    pub fn new(content: impl Into<String>, category: FactCategory, confidence: f64) -> Self {
+        Self {
+            id: next_fact_id(),
+            content: content.into(),
+            category,
+            confidence: confidence.clamp(0.0, 1.0),
+            created_at: now_secs(),
+            source: None,
+        }
+    }
+
+    /// Attach a source session ID (builder pattern).
+    pub fn with_source(mut self, source: impl Into<String>) -> Self {
+        self.source = Some(source.into());
+        self
+    }
+}
+
+/// Result of a fact extraction batch.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ExtractedFactBatch {
+    pub facts: Vec<Fact>,
+    pub agent_id: String,
+    pub session_id: String,
+}
+
+impl ExtractedFactBatch {
+    /// Deduplicate facts by trimmed, lowercased content comparison.
+    /// When duplicates are found, keep the one with higher confidence.
+    pub fn deduplicate(mut self) -> Self {
+        let mut best_index: HashMap<String, usize> = HashMap::new();
+        let mut to_remove: Vec<usize> = Vec::new();
+
+        for (i, fact) in self.facts.iter().enumerate() {
+            let key = fact.content.trim().to_lowercase();
+            if let Some(&prev_idx) = best_index.get(&key) {
+                // Keep the one with higher confidence
+                if self.facts[prev_idx].confidence >= fact.confidence {
+                    to_remove.push(i);
+                } else {
+                    to_remove.push(prev_idx);
+                    best_index.insert(key, i);
+                }
+            } else {
+                best_index.insert(key, i);
+            }
+        }
+
+        // Remove in reverse order to maintain valid indices
+        for idx in to_remove.into_iter().rev() {
+            self.facts.remove(idx);
+        }
+
+        self
+    }
+
+    /// Filter facts below the given confidence threshold.
+    pub fn filter_by_confidence(mut self, min_confidence: f64) -> Self {
+        self.facts.retain(|f| f.confidence >= min_confidence);
+        self
+    }
+
+    /// Returns true if there are no facts in the batch.
+    pub fn is_empty(&self) -> bool {
+        self.facts.is_empty()
+    }
+
+    /// Returns the number of facts in the batch.
+    pub fn len(&self) -> usize {
+        self.facts.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fact_new_clamps_confidence() {
+        let f = Fact::new("hello", FactCategory::General, 1.5);
+        assert!((f.confidence - 1.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_fact_with_source() {
+        let f = Fact::new("prefers dark mode", FactCategory::Preference, 0.9)
+            .with_source("sess-123");
+        assert_eq!(f.source.as_deref(), Some("sess-123"));
+    }
+
+    #[test]
+    fn test_deduplicate_keeps_higher_confidence() {
+        let batch = ExtractedFactBatch {
+            facts: vec![
+                Fact::new("likes Python", FactCategory::Preference, 0.8),
+                Fact::new("Likes Python", FactCategory::Preference, 0.95),
+                Fact::new("uses VSCode", FactCategory::Behavior, 0.7),
+            ],
+            agent_id: "agent-1".into(),
+            session_id: "sess-1".into(),
+        };
+
+        let deduped = batch.deduplicate();
+        assert_eq!(deduped.facts.len(), 2);
+        // The "likes Python" fact with 0.95 confidence should survive
+        let python_fact = deduped
+            .facts
+            .iter()
+            .find(|f| f.content.contains("Python"))
+            .unwrap();
+        assert!((python_fact.confidence - 0.95).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_filter_by_confidence() {
+        let batch = ExtractedFactBatch {
+            facts: vec![
+                Fact::new("high", FactCategory::General, 0.9),
+                Fact::new("medium", FactCategory::General, 0.75),
+                Fact::new("low", FactCategory::General, 0.3),
+            ],
+            agent_id: "agent-1".into(),
+            session_id: "sess-1".into(),
+        };
+
+        let filtered = batch.filter_by_confidence(0.7);
+        assert_eq!(filtered.facts.len(), 2);
+    }
+
+    #[test]
+    fn test_is_empty_and_len() {
+        let batch = ExtractedFactBatch {
+            facts: vec![],
+            agent_id: "agent-1".into(),
+            session_id: "sess-1".into(),
+        };
+        assert!(batch.is_empty());
+        assert_eq!(batch.len(), 0);
+    }
+}
--- a/crates/zclaw-memory/src/lib.rs
+++ b/crates/zclaw-memory/src/lib.rs
@@ -5,7 +5,9 @@
 mod store;
 mod session;
 mod schema;
+pub mod fact;

 pub use store::*;
 pub use session::*;
 pub use schema::*;
+pub use fact::{Fact, FactCategory, ExtractedFactBatch};