refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup
- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
This commit is contained in:
@@ -24,3 +24,6 @@ libsqlite3-sys = { workspace = true }
|
||||
|
||||
# Async utilities
|
||||
futures = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
|
||||
202
crates/zclaw-memory/src/fact.rs
Normal file
202
crates/zclaw-memory/src/fact.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
//! Structured fact extraction and storage.
|
||||
//!
|
||||
//! Inspired by DeerFlow's LLM-driven fact extraction with deduplication
|
||||
//! and confidence scoring. Facts are natural language statements extracted
|
||||
//! from conversations, categorized and scored for retrieval quality.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
/// Global counter for generating unique fact IDs without uuid dependency overhead.
|
||||
static FACT_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
fn now_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
fn next_fact_id() -> String {
|
||||
let ts = now_secs();
|
||||
let seq = FACT_COUNTER.fetch_add(1, Ordering::Relaxed);
|
||||
format!("fact-{}-{}", ts, seq)
|
||||
}
|
||||
|
||||
/// A structured fact extracted from conversation.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Fact {
|
||||
/// Unique identifier
|
||||
pub id: String,
|
||||
/// The fact content (natural language)
|
||||
pub content: String,
|
||||
/// Category of the fact
|
||||
pub category: FactCategory,
|
||||
/// Confidence score (0.0 - 1.0)
|
||||
pub confidence: f64,
|
||||
/// When this fact was extracted (unix timestamp in seconds)
|
||||
pub created_at: u64,
|
||||
/// Source session ID
|
||||
pub source: Option<String>,
|
||||
}
|
||||
|
||||
/// Categories for structured facts.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum FactCategory {
|
||||
/// User preference (language, style, format)
|
||||
Preference,
|
||||
/// Domain knowledge or context
|
||||
Knowledge,
|
||||
/// Behavioral pattern or habit
|
||||
Behavior,
|
||||
/// Task-specific context
|
||||
TaskContext,
|
||||
/// General information
|
||||
General,
|
||||
}
|
||||
|
||||
impl Fact {
|
||||
/// Create a new fact with auto-generated ID and timestamp.
|
||||
pub fn new(content: impl Into<String>, category: FactCategory, confidence: f64) -> Self {
|
||||
Self {
|
||||
id: next_fact_id(),
|
||||
content: content.into(),
|
||||
category,
|
||||
confidence: confidence.clamp(0.0, 1.0),
|
||||
created_at: now_secs(),
|
||||
source: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach a source session ID (builder pattern).
|
||||
pub fn with_source(mut self, source: impl Into<String>) -> Self {
|
||||
self.source = Some(source.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of a fact extraction batch.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExtractedFactBatch {
|
||||
pub facts: Vec<Fact>,
|
||||
pub agent_id: String,
|
||||
pub session_id: String,
|
||||
}
|
||||
|
||||
impl ExtractedFactBatch {
|
||||
/// Deduplicate facts by trimmed, lowercased content comparison.
|
||||
/// When duplicates are found, keep the one with higher confidence.
|
||||
pub fn deduplicate(mut self) -> Self {
|
||||
let mut best_index: HashMap<String, usize> = HashMap::new();
|
||||
let mut to_remove: Vec<usize> = Vec::new();
|
||||
|
||||
for (i, fact) in self.facts.iter().enumerate() {
|
||||
let key = fact.content.trim().to_lowercase();
|
||||
if let Some(&prev_idx) = best_index.get(&key) {
|
||||
// Keep the one with higher confidence
|
||||
if self.facts[prev_idx].confidence >= fact.confidence {
|
||||
to_remove.push(i);
|
||||
} else {
|
||||
to_remove.push(prev_idx);
|
||||
best_index.insert(key, i);
|
||||
}
|
||||
} else {
|
||||
best_index.insert(key, i);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove in reverse order to maintain valid indices
|
||||
for idx in to_remove.into_iter().rev() {
|
||||
self.facts.remove(idx);
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
/// Filter facts below the given confidence threshold.
|
||||
pub fn filter_by_confidence(mut self, min_confidence: f64) -> Self {
|
||||
self.facts.retain(|f| f.confidence >= min_confidence);
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns true if there are no facts in the batch.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.facts.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the number of facts in the batch.
|
||||
pub fn len(&self) -> usize {
|
||||
self.facts.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fact_new_clamps_confidence() {
|
||||
let f = Fact::new("hello", FactCategory::General, 1.5);
|
||||
assert!((f.confidence - 1.0).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fact_with_source() {
|
||||
let f = Fact::new("prefers dark mode", FactCategory::Preference, 0.9)
|
||||
.with_source("sess-123");
|
||||
assert_eq!(f.source.as_deref(), Some("sess-123"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deduplicate_keeps_higher_confidence() {
|
||||
let batch = ExtractedFactBatch {
|
||||
facts: vec![
|
||||
Fact::new("likes Python", FactCategory::Preference, 0.8),
|
||||
Fact::new("Likes Python", FactCategory::Preference, 0.95),
|
||||
Fact::new("uses VSCode", FactCategory::Behavior, 0.7),
|
||||
],
|
||||
agent_id: "agent-1".into(),
|
||||
session_id: "sess-1".into(),
|
||||
};
|
||||
|
||||
let deduped = batch.deduplicate();
|
||||
assert_eq!(deduped.facts.len(), 2);
|
||||
// The "likes Python" fact with 0.95 confidence should survive
|
||||
let python_fact = deduped
|
||||
.facts
|
||||
.iter()
|
||||
.find(|f| f.content.contains("Python"))
|
||||
.unwrap();
|
||||
assert!((python_fact.confidence - 0.95).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_by_confidence() {
|
||||
let batch = ExtractedFactBatch {
|
||||
facts: vec![
|
||||
Fact::new("high", FactCategory::General, 0.9),
|
||||
Fact::new("medium", FactCategory::General, 0.75),
|
||||
Fact::new("low", FactCategory::General, 0.3),
|
||||
],
|
||||
agent_id: "agent-1".into(),
|
||||
session_id: "sess-1".into(),
|
||||
};
|
||||
|
||||
let filtered = batch.filter_by_confidence(0.7);
|
||||
assert_eq!(filtered.facts.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_empty_and_len() {
|
||||
let batch = ExtractedFactBatch {
|
||||
facts: vec![],
|
||||
agent_id: "agent-1".into(),
|
||||
session_id: "sess-1".into(),
|
||||
};
|
||||
assert!(batch.is_empty());
|
||||
assert_eq!(batch.len(), 0);
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,9 @@
|
||||
mod store;
|
||||
mod session;
|
||||
mod schema;
|
||||
pub mod fact;
|
||||
|
||||
pub use store::*;
|
||||
pub use session::*;
|
||||
pub use schema::*;
|
||||
pub use fact::{Fact, FactCategory, ExtractedFactBatch};
|
||||
|
||||
Reference in New Issue
Block a user