Files
zclaw_openfang/crates/zclaw-growth/src/extractor.rs

396 lines
12 KiB
Rust

//! Memory Extractor - Extracts preferences, knowledge, and experience from conversations
//!
//! This module provides the `MemoryExtractor` which analyzes conversations
//! using LLM to extract valuable memories for agent growth.
use crate::types::{ExtractedMemory, ExtractionConfig, MemoryType};
use crate::viking_adapter::VikingAdapter;
use async_trait::async_trait;
use std::sync::Arc;
use zclaw_types::{Message, Result, SessionId};
/// Trait for LLM driver abstraction
/// This allows us to use any LLM driver implementation
#[async_trait]
pub trait LlmDriverForExtraction: Send + Sync {
/// Extract memories from conversation using LLM
async fn extract_memories(
&self,
messages: &[Message],
extraction_type: MemoryType,
) -> Result<Vec<ExtractedMemory>>;
/// 单次 LLM 调用提取全部类型(记忆 + 经验 + 画像信号)
/// 默认实现:退化到 3 次独立调用
async fn extract_combined_all(
&self,
messages: &[Message],
) -> Result<crate::types::CombinedExtraction> {
let mut combined = crate::types::CombinedExtraction::default();
for mt in [MemoryType::Preference, MemoryType::Knowledge, MemoryType::Experience] {
if let Ok(mems) = self.extract_memories(messages, mt).await {
combined.memories.extend(mems);
}
}
Ok(combined)
}
}
/// Memory Extractor - extracts memories from conversations
pub struct MemoryExtractor {
/// LLM driver for extraction (optional)
llm_driver: Option<Arc<dyn LlmDriverForExtraction>>,
/// OpenViking adapter for storage
viking: Option<Arc<VikingAdapter>>,
/// Extraction configuration
config: ExtractionConfig,
}
impl MemoryExtractor {
/// Create a new memory extractor with LLM driver
pub fn new(llm_driver: Arc<dyn LlmDriverForExtraction>) -> Self {
Self {
llm_driver: Some(llm_driver),
viking: None,
config: ExtractionConfig::default(),
}
}
/// Create a new memory extractor without LLM driver
///
/// This is useful for cases where LLM-based extraction is not needed
/// or will be set later using `with_llm_driver`
pub fn new_without_driver() -> Self {
Self {
llm_driver: None,
viking: None,
config: ExtractionConfig::default(),
}
}
/// Set the LLM driver
pub fn with_llm_driver(mut self, driver: Arc<dyn LlmDriverForExtraction>) -> Self {
self.llm_driver = Some(driver);
self
}
/// Create with OpenViking adapter
pub fn with_viking(mut self, viking: Arc<VikingAdapter>) -> Self {
self.viking = Some(viking);
self
}
/// Set extraction configuration
pub fn with_config(mut self, config: ExtractionConfig) -> Self {
self.config = config;
self
}
/// Extract memories from a conversation
///
/// This method analyzes the conversation and extracts:
/// - Preferences: User's communication style, format preferences, language preferences
/// - Knowledge: User-related facts, domain knowledge, lessons learned
/// - Experience: Skill/tool usage patterns and outcomes
///
/// Returns an empty Vec if no LLM driver is configured
pub async fn extract(
&self,
messages: &[Message],
session_id: SessionId,
) -> Result<Vec<ExtractedMemory>> {
// Check if LLM driver is available
let _llm_driver = match &self.llm_driver {
Some(driver) => driver,
None => {
tracing::debug!("[MemoryExtractor] No LLM driver configured, skipping extraction");
return Ok(Vec::new());
}
};
let mut results = Vec::new();
// Extract preferences if enabled
if self.config.extract_preferences {
tracing::debug!("[MemoryExtractor] Extracting preferences...");
let prefs = self.extract_preferences(messages, session_id).await?;
results.extend(prefs);
}
// Extract knowledge if enabled
if self.config.extract_knowledge {
tracing::debug!("[MemoryExtractor] Extracting knowledge...");
let knowledge = self.extract_knowledge(messages, session_id).await?;
results.extend(knowledge);
}
// Extract experience if enabled
if self.config.extract_experience {
tracing::debug!("[MemoryExtractor] Extracting experience...");
let experience = self.extract_experience(messages, session_id).await?;
results.extend(experience);
}
// Filter by confidence threshold
results.retain(|m| m.confidence >= self.config.min_confidence);
tracing::info!(
"[MemoryExtractor] Extracted {} memories (confidence >= {})",
results.len(),
self.config.min_confidence
);
Ok(results)
}
/// Extract user preferences from conversation
async fn extract_preferences(
&self,
messages: &[Message],
session_id: SessionId,
) -> Result<Vec<ExtractedMemory>> {
let llm_driver = match &self.llm_driver {
Some(driver) => driver,
None => return Ok(Vec::new()),
};
let mut results = llm_driver
.extract_memories(messages, MemoryType::Preference)
.await?;
// Set source session
for memory in &mut results {
memory.source_session = session_id;
}
Ok(results)
}
/// Extract knowledge from conversation
async fn extract_knowledge(
&self,
messages: &[Message],
session_id: SessionId,
) -> Result<Vec<ExtractedMemory>> {
let llm_driver = match &self.llm_driver {
Some(driver) => driver,
None => return Ok(Vec::new()),
};
let mut results = llm_driver
.extract_memories(messages, MemoryType::Knowledge)
.await?;
for memory in &mut results {
memory.source_session = session_id;
}
Ok(results)
}
/// Extract experience from conversation
async fn extract_experience(
&self,
messages: &[Message],
session_id: SessionId,
) -> Result<Vec<ExtractedMemory>> {
let llm_driver = match &self.llm_driver {
Some(driver) => driver,
None => return Ok(Vec::new()),
};
let mut results = llm_driver
.extract_memories(messages, MemoryType::Experience)
.await?;
for memory in &mut results {
memory.source_session = session_id;
}
Ok(results)
}
/// Store extracted memories to OpenViking
pub async fn store_memories(
&self,
agent_id: &str,
memories: &[ExtractedMemory],
) -> Result<usize> {
let viking = match &self.viking {
Some(v) => v,
None => {
tracing::warn!("[MemoryExtractor] No VikingAdapter configured, memories not stored");
return Ok(0);
}
};
let mut stored = 0;
for memory in memories {
let entry = memory.to_memory_entry(agent_id);
match viking.store(&entry).await {
Ok(_) => stored += 1,
Err(e) => {
tracing::error!(
"[MemoryExtractor] Failed to store memory {}: {}",
memory.category,
e
);
}
}
}
tracing::info!("[MemoryExtractor] Stored {} memories to OpenViking", stored);
Ok(stored)
}
}
/// Default extraction prompts for LLM
pub mod prompts {
use crate::types::MemoryType;
/// Get the extraction prompt for a memory type
pub fn get_extraction_prompt(memory_type: MemoryType) -> &'static str {
match memory_type {
MemoryType::Preference => PREFERENCE_EXTRACTION_PROMPT,
MemoryType::Knowledge => KNOWLEDGE_EXTRACTION_PROMPT,
MemoryType::Experience => EXPERIENCE_EXTRACTION_PROMPT,
MemoryType::Session => SESSION_SUMMARY_PROMPT,
}
}
const PREFERENCE_EXTRACTION_PROMPT: &str = r#"
分析以下对话,提取用户的偏好设置。关注:
- 沟通风格偏好(简洁/详细、正式/随意)
- 回复格式偏好(列表/段落、代码块风格)
- 语言偏好
- 主题兴趣
请以 JSON 格式返回,格式如下:
[
{
"category": "communication-style",
"content": "用户偏好简洁的回复",
"confidence": 0.9,
"keywords": ["简洁", "回复风格"]
}
]
对话内容:
"#;
const KNOWLEDGE_EXTRACTION_PROMPT: &str = r#"
分析以下对话,提取有价值的知识。关注:
- 用户相关事实(职业、项目、背景)
- 领域知识(技术栈、工具、最佳实践)
- 经验教训(成功/失败案例)
请以 JSON 格式返回,格式如下:
[
{
"category": "user-facts",
"content": "用户是一名 Rust 开发者",
"confidence": 0.85,
"keywords": ["Rust", "开发者"]
}
]
对话内容:
"#;
const EXPERIENCE_EXTRACTION_PROMPT: &str = r#"
分析以下对话,提取技能/工具使用经验。关注:
- 使用的技能或工具
- 执行结果(成功/失败)
- 改进建议
请以 JSON 格式返回,格式如下:
[
{
"category": "skill-browser",
"content": "浏览器技能在搜索技术文档时效果很好",
"confidence": 0.8,
"keywords": ["浏览器", "搜索", "文档"]
}
]
对话内容:
"#;
const SESSION_SUMMARY_PROMPT: &str = r#"
总结以下对话会话。关注:
- 主要话题
- 关键决策
- 未解决问题
请以 JSON 格式返回,格式如下:
{
"summary": "会话摘要内容",
"keywords": ["关键词1", "关键词2"],
"topics": ["主题1", "主题2"]
}
对话内容:
"#;
}
#[cfg(test)]
mod tests {
use super::*;
struct MockLlmDriver;
#[async_trait]
impl LlmDriverForExtraction for MockLlmDriver {
async fn extract_memories(
&self,
_messages: &[Message],
extraction_type: MemoryType,
) -> Result<Vec<ExtractedMemory>> {
Ok(vec![ExtractedMemory::new(
extraction_type,
"test-category",
"test content",
SessionId::new(),
)])
}
}
#[tokio::test]
async fn test_extractor_creation() {
let driver = Arc::new(MockLlmDriver);
let extractor = MemoryExtractor::new(driver);
assert!(extractor.viking.is_none());
}
#[tokio::test]
async fn test_extract_memories() {
let driver = Arc::new(MockLlmDriver);
let extractor = MemoryExtractor::new(driver);
let messages = vec![Message::user("Hello")];
let result = extractor
.extract(&messages, SessionId::new())
.await
.unwrap();
// Should extract preferences, knowledge, and experience
assert!(!result.is_empty());
}
#[tokio::test]
async fn test_extract_combined_all_default_impl() {
let driver = MockLlmDriver;
let messages = vec![Message::user("Hello")];
let result = driver.extract_combined_all(&messages).await.unwrap();
assert_eq!(result.memories.len(), 3); // 3 types
}
#[test]
fn test_prompts_available() {
assert!(!prompts::get_extraction_prompt(MemoryType::Preference).is_empty());
assert!(!prompts::get_extraction_prompt(MemoryType::Knowledge).is_empty());
assert!(!prompts::get_extraction_prompt(MemoryType::Experience).is_empty());
assert!(!prompts::get_extraction_prompt(MemoryType::Session).is_empty());
}
}