Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
根因: growth.rs 构造 CombinedExtraction 时硬编码 experiences: Vec::new() 和 profile_signals: default(),导致 L1 结构化经验不被提取、L2 技能进化 没有输入数据、整个进化引擎无法端到端工作。 修复: - extractor.rs: 添加 COMBINED_EXTRACTION_PROMPT 统一 prompt,单次 LLM 调用 同时输出 memories + experiences + profile_signals - extractor.rs: 添加 parse_combined_response() 解析 LLM JSON 响应 - LlmDriverForExtraction trait: 添加 extract_with_prompt() 方法(默认不支持, 退化到现有 extract() + 启发式推断) - MemoryExtractor: 添加 extract_combined() 方法,优先单次调用,失败则退化 - growth.rs: extract_combined() 使用新的合并提取替代硬编码空值 - TauriExtractionDriver: 实现 extract_with_prompt() - ProfileSignals: 添加 has_any_signal() 方法 - types.rs: ProfileSignals 无 structural 变化(字段已存在) 测试: 4 个新测试(parse_combined_response_full/minimal/invalid + extract_combined_fallback),11 个 extractor 测试全部通过
832 lines
27 KiB
Rust
832 lines
27 KiB
Rust
//! Memory Extractor - Extracts preferences, knowledge, and experience from conversations
|
||
//!
|
||
//! This module provides the `MemoryExtractor` which analyzes conversations
|
||
//! using LLM to extract valuable memories for agent growth.
|
||
|
||
use crate::types::{ExtractedMemory, ExtractionConfig, MemoryType};
|
||
use crate::viking_adapter::VikingAdapter;
|
||
use async_trait::async_trait;
|
||
use std::sync::Arc;
|
||
use zclaw_types::{Message, Result, SessionId};
|
||
|
||
/// Trait for LLM driver abstraction
|
||
/// This allows us to use any LLM driver implementation
|
||
#[async_trait]
|
||
pub trait LlmDriverForExtraction: Send + Sync {
|
||
/// Extract memories from conversation using LLM
|
||
async fn extract_memories(
|
||
&self,
|
||
messages: &[Message],
|
||
extraction_type: MemoryType,
|
||
) -> Result<Vec<ExtractedMemory>>;
|
||
|
||
/// 单次 LLM 调用提取全部类型(记忆 + 经验 + 画像信号)
|
||
/// 默认实现:退化到 3 次独立调用(experiences 和 profile_signals 为空)
|
||
async fn extract_combined_all(
|
||
&self,
|
||
messages: &[Message],
|
||
) -> Result<crate::types::CombinedExtraction> {
|
||
let mut combined = crate::types::CombinedExtraction::default();
|
||
for mt in [MemoryType::Preference, MemoryType::Knowledge, MemoryType::Experience] {
|
||
if let Ok(mems) = self.extract_memories(messages, mt).await {
|
||
combined.memories.extend(mems);
|
||
}
|
||
}
|
||
Ok(combined)
|
||
}
|
||
|
||
/// 使用自定义 prompt 进行单次 LLM 调用,返回原始文本响应
|
||
/// 用于统一提取场景,默认返回不支持错误
|
||
async fn extract_with_prompt(
|
||
&self,
|
||
_messages: &[Message],
|
||
_system_prompt: &str,
|
||
_user_prompt: &str,
|
||
) -> Result<String> {
|
||
Err(zclaw_types::ZclawError::Internal(
|
||
"extract_with_prompt not implemented".to_string(),
|
||
))
|
||
}
|
||
}
|
||
|
||
/// Memory Extractor - extracts memories from conversations
|
||
pub struct MemoryExtractor {
|
||
/// LLM driver for extraction (optional)
|
||
llm_driver: Option<Arc<dyn LlmDriverForExtraction>>,
|
||
/// OpenViking adapter for storage
|
||
viking: Option<Arc<VikingAdapter>>,
|
||
/// Extraction configuration
|
||
config: ExtractionConfig,
|
||
}
|
||
|
||
impl MemoryExtractor {
|
||
/// Create a new memory extractor with LLM driver
|
||
pub fn new(llm_driver: Arc<dyn LlmDriverForExtraction>) -> Self {
|
||
Self {
|
||
llm_driver: Some(llm_driver),
|
||
viking: None,
|
||
config: ExtractionConfig::default(),
|
||
}
|
||
}
|
||
|
||
/// Create a new memory extractor without LLM driver
|
||
///
|
||
/// This is useful for cases where LLM-based extraction is not needed
|
||
/// or will be set later using `with_llm_driver`
|
||
pub fn new_without_driver() -> Self {
|
||
Self {
|
||
llm_driver: None,
|
||
viking: None,
|
||
config: ExtractionConfig::default(),
|
||
}
|
||
}
|
||
|
||
/// Set the LLM driver
|
||
pub fn with_llm_driver(mut self, driver: Arc<dyn LlmDriverForExtraction>) -> Self {
|
||
self.llm_driver = Some(driver);
|
||
self
|
||
}
|
||
|
||
/// Create with OpenViking adapter
|
||
pub fn with_viking(mut self, viking: Arc<VikingAdapter>) -> Self {
|
||
self.viking = Some(viking);
|
||
self
|
||
}
|
||
|
||
/// Set extraction configuration
|
||
pub fn with_config(mut self, config: ExtractionConfig) -> Self {
|
||
self.config = config;
|
||
self
|
||
}
|
||
|
||
/// Extract memories from a conversation
|
||
///
|
||
/// This method analyzes the conversation and extracts:
|
||
/// - Preferences: User's communication style, format preferences, language preferences
|
||
/// - Knowledge: User-related facts, domain knowledge, lessons learned
|
||
/// - Experience: Skill/tool usage patterns and outcomes
|
||
///
|
||
/// Returns an empty Vec if no LLM driver is configured
|
||
pub async fn extract(
|
||
&self,
|
||
messages: &[Message],
|
||
session_id: SessionId,
|
||
) -> Result<Vec<ExtractedMemory>> {
|
||
// Check if LLM driver is available
|
||
if self.llm_driver.is_none() {
|
||
tracing::debug!("[MemoryExtractor] No LLM driver configured, skipping extraction");
|
||
return Ok(Vec::new());
|
||
}
|
||
|
||
let mut results = Vec::new();
|
||
|
||
// Extract preferences if enabled
|
||
if self.config.extract_preferences {
|
||
tracing::debug!("[MemoryExtractor] Extracting preferences...");
|
||
let prefs = self.extract_preferences(messages, session_id).await?;
|
||
results.extend(prefs);
|
||
}
|
||
|
||
// Extract knowledge if enabled
|
||
if self.config.extract_knowledge {
|
||
tracing::debug!("[MemoryExtractor] Extracting knowledge...");
|
||
let knowledge = self.extract_knowledge(messages, session_id).await?;
|
||
results.extend(knowledge);
|
||
}
|
||
|
||
// Extract experience if enabled
|
||
if self.config.extract_experience {
|
||
tracing::debug!("[MemoryExtractor] Extracting experience...");
|
||
let experience = self.extract_experience(messages, session_id).await?;
|
||
results.extend(experience);
|
||
}
|
||
|
||
// Filter by confidence threshold
|
||
results.retain(|m| m.confidence >= self.config.min_confidence);
|
||
|
||
tracing::info!(
|
||
"[MemoryExtractor] Extracted {} memories (confidence >= {})",
|
||
results.len(),
|
||
self.config.min_confidence
|
||
);
|
||
|
||
Ok(results)
|
||
}
|
||
|
||
/// Extract user preferences from conversation
|
||
async fn extract_preferences(
|
||
&self,
|
||
messages: &[Message],
|
||
session_id: SessionId,
|
||
) -> Result<Vec<ExtractedMemory>> {
|
||
let llm_driver = match &self.llm_driver {
|
||
Some(driver) => driver,
|
||
None => return Ok(Vec::new()),
|
||
};
|
||
|
||
let mut results = llm_driver
|
||
.extract_memories(messages, MemoryType::Preference)
|
||
.await?;
|
||
|
||
// Set source session
|
||
for memory in &mut results {
|
||
memory.source_session = session_id;
|
||
}
|
||
|
||
Ok(results)
|
||
}
|
||
|
||
/// Extract knowledge from conversation
|
||
async fn extract_knowledge(
|
||
&self,
|
||
messages: &[Message],
|
||
session_id: SessionId,
|
||
) -> Result<Vec<ExtractedMemory>> {
|
||
let llm_driver = match &self.llm_driver {
|
||
Some(driver) => driver,
|
||
None => return Ok(Vec::new()),
|
||
};
|
||
|
||
let mut results = llm_driver
|
||
.extract_memories(messages, MemoryType::Knowledge)
|
||
.await?;
|
||
|
||
for memory in &mut results {
|
||
memory.source_session = session_id;
|
||
}
|
||
|
||
Ok(results)
|
||
}
|
||
|
||
/// Extract experience from conversation
|
||
async fn extract_experience(
|
||
&self,
|
||
messages: &[Message],
|
||
session_id: SessionId,
|
||
) -> Result<Vec<ExtractedMemory>> {
|
||
let llm_driver = match &self.llm_driver {
|
||
Some(driver) => driver,
|
||
None => return Ok(Vec::new()),
|
||
};
|
||
|
||
let mut results = llm_driver
|
||
.extract_memories(messages, MemoryType::Experience)
|
||
.await?;
|
||
|
||
for memory in &mut results {
|
||
memory.source_session = session_id;
|
||
}
|
||
|
||
Ok(results)
|
||
}
|
||
|
||
/// Store extracted memories to OpenViking
|
||
pub async fn store_memories(
|
||
&self,
|
||
agent_id: &str,
|
||
memories: &[ExtractedMemory],
|
||
) -> Result<usize> {
|
||
let viking = match &self.viking {
|
||
Some(v) => v,
|
||
None => {
|
||
tracing::warn!("[MemoryExtractor] No VikingAdapter configured, memories not stored");
|
||
return Ok(0);
|
||
}
|
||
};
|
||
|
||
let mut stored = 0;
|
||
for memory in memories {
|
||
let entry = memory.to_memory_entry(agent_id);
|
||
match viking.store(&entry).await {
|
||
Ok(_) => stored += 1,
|
||
Err(e) => {
|
||
tracing::error!(
|
||
"[MemoryExtractor] Failed to store memory {}: {}",
|
||
memory.category,
|
||
e
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
tracing::info!("[MemoryExtractor] Stored {} memories to OpenViking", stored);
|
||
Ok(stored)
|
||
}
|
||
|
||
/// 统一提取:单次 LLM 调用同时产出 memories + experiences + profile_signals
|
||
///
|
||
/// 优先使用 `extract_with_prompt()` 进行单次调用;若 driver 不支持则
|
||
/// 退化为 `extract()` + 从记忆推断经验/画像。
|
||
pub async fn extract_combined(
|
||
&self,
|
||
messages: &[Message],
|
||
session_id: SessionId,
|
||
) -> Result<crate::types::CombinedExtraction> {
|
||
let llm_driver = match &self.llm_driver {
|
||
Some(driver) => driver,
|
||
None => {
|
||
tracing::debug!(
|
||
"[MemoryExtractor] No LLM driver configured, skipping combined extraction"
|
||
);
|
||
return Ok(crate::types::CombinedExtraction::default());
|
||
}
|
||
};
|
||
|
||
// 尝试单次 LLM 调用路径
|
||
let system_prompt = "You are a memory extraction assistant. Analyze conversations and extract \
|
||
structured memories, experiences, and profile signals in valid JSON format. \
|
||
Always respond with valid JSON only, no additional text or markdown formatting.";
|
||
let user_prompt = format!(
|
||
"{}{}",
|
||
crate::extractor::prompts::COMBINED_EXTRACTION_PROMPT,
|
||
format_conversation_text(messages)
|
||
);
|
||
|
||
match llm_driver
|
||
.extract_with_prompt(messages, system_prompt, &user_prompt)
|
||
.await
|
||
{
|
||
Ok(raw_text) if !raw_text.trim().is_empty() => {
|
||
match parse_combined_response(&raw_text, session_id.clone()) {
|
||
Ok(combined) => {
|
||
tracing::info!(
|
||
"[MemoryExtractor] Combined extraction: {} memories, {} experiences, {} profile signals",
|
||
combined.memories.len(),
|
||
combined.experiences.len(),
|
||
combined.profile_signals.has_any_signal() as usize,
|
||
);
|
||
return Ok(combined);
|
||
}
|
||
Err(e) => {
|
||
tracing::warn!(
|
||
"[MemoryExtractor] Combined response parse failed, falling back: {}",
|
||
e
|
||
);
|
||
}
|
||
}
|
||
}
|
||
Ok(_) => {
|
||
tracing::debug!("[MemoryExtractor] extract_with_prompt returned empty, falling back");
|
||
}
|
||
Err(e) => {
|
||
tracing::debug!(
|
||
"[MemoryExtractor] extract_with_prompt not supported ({}), falling back",
|
||
e
|
||
);
|
||
}
|
||
}
|
||
|
||
// 退化路径:使用已有的 extract() 然后推断 experiences 和 profile_signals
|
||
let memories = self.extract(messages, session_id).await?;
|
||
let experiences = infer_experiences_from_memories(&memories);
|
||
let profile_signals = infer_profile_signals_from_memories(&memories);
|
||
|
||
Ok(crate::types::CombinedExtraction {
|
||
memories,
|
||
experiences,
|
||
profile_signals,
|
||
})
|
||
}
|
||
}
|
||
|
||
/// 格式化对话消息为文本
|
||
fn format_conversation_text(messages: &[Message]) -> String {
|
||
messages
|
||
.iter()
|
||
.filter_map(|msg| match msg {
|
||
Message::User { content } => Some(format!("[User]: {}", content)),
|
||
Message::Assistant { content, .. } => Some(format!("[Assistant]: {}", content)),
|
||
Message::System { content } => Some(format!("[System]: {}", content)),
|
||
Message::ToolUse { .. } | Message::ToolResult { .. } => None,
|
||
})
|
||
.collect::<Vec<_>>()
|
||
.join("\n\n")
|
||
}
|
||
|
||
/// 从 LLM 原始响应解析 CombinedExtraction
|
||
pub fn parse_combined_response(
|
||
raw: &str,
|
||
session_id: SessionId,
|
||
) -> Result<crate::types::CombinedExtraction> {
|
||
use crate::types::CombinedExtraction;
|
||
|
||
let json_str = crate::json_utils::extract_json_block(raw);
|
||
let parsed: serde_json::Value = serde_json::from_str(json_str).map_err(|e| {
|
||
zclaw_types::ZclawError::Internal(format!("Failed to parse combined JSON: {}", e))
|
||
})?;
|
||
|
||
// 解析 memories
|
||
let memories = parsed
|
||
.get("memories")
|
||
.and_then(|v| v.as_array())
|
||
.map(|arr| {
|
||
arr.iter()
|
||
.filter_map(|item| parse_memory_item(item, &session_id))
|
||
.collect::<Vec<_>>()
|
||
})
|
||
.unwrap_or_default();
|
||
|
||
// 解析 experiences
|
||
let experiences = parsed
|
||
.get("experiences")
|
||
.and_then(|v| v.as_array())
|
||
.map(|arr| {
|
||
arr.iter()
|
||
.filter_map(parse_experience_item)
|
||
.collect::<Vec<_>>()
|
||
})
|
||
.unwrap_or_default();
|
||
|
||
// 解析 profile_signals
|
||
let profile_signals = parse_profile_signals(&parsed);
|
||
|
||
Ok(CombinedExtraction {
|
||
memories,
|
||
experiences,
|
||
profile_signals,
|
||
})
|
||
}
|
||
|
||
/// 解析单个 memory 项
|
||
fn parse_memory_item(
|
||
value: &serde_json::Value,
|
||
session_id: &SessionId,
|
||
) -> Option<ExtractedMemory> {
|
||
let content = value.get("content")?.as_str()?.to_string();
|
||
let category = value
|
||
.get("category")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("unknown")
|
||
.to_string();
|
||
let memory_type_str = value
|
||
.get("memory_type")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("knowledge");
|
||
let memory_type = crate::types::MemoryType::parse(memory_type_str);
|
||
let confidence = value
|
||
.get("confidence")
|
||
.and_then(|v| v.as_f64())
|
||
.unwrap_or(0.7) as f32;
|
||
let keywords = crate::json_utils::extract_string_array(value, "keywords");
|
||
|
||
Some(
|
||
ExtractedMemory::new(memory_type, category, content, session_id.clone())
|
||
.with_confidence(confidence)
|
||
.with_keywords(keywords),
|
||
)
|
||
}
|
||
|
||
/// 解析单个 experience 项
|
||
fn parse_experience_item(value: &serde_json::Value) -> Option<crate::types::ExperienceCandidate> {
|
||
use crate::types::Outcome;
|
||
|
||
let pain_pattern = value.get("pain_pattern")?.as_str()?.to_string();
|
||
let context = value
|
||
.get("context")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("")
|
||
.to_string();
|
||
let solution_steps = crate::json_utils::extract_string_array(value, "solution_steps");
|
||
let outcome_str = value
|
||
.get("outcome")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("partial");
|
||
let outcome = match outcome_str {
|
||
"success" => Outcome::Success,
|
||
"failed" => Outcome::Failed,
|
||
_ => Outcome::Partial,
|
||
};
|
||
let confidence = value
|
||
.get("confidence")
|
||
.and_then(|v| v.as_f64())
|
||
.unwrap_or(0.6) as f32;
|
||
let tools_used = crate::json_utils::extract_string_array(value, "tools_used");
|
||
let industry_context = value
|
||
.get("industry_context")
|
||
.and_then(|v| v.as_str())
|
||
.map(String::from);
|
||
|
||
Some(crate::types::ExperienceCandidate {
|
||
pain_pattern,
|
||
context,
|
||
solution_steps,
|
||
outcome,
|
||
confidence,
|
||
tools_used,
|
||
industry_context,
|
||
})
|
||
}
|
||
|
||
/// 解析 profile_signals
|
||
fn parse_profile_signals(obj: &serde_json::Value) -> crate::types::ProfileSignals {
|
||
let signals = obj.get("profile_signals");
|
||
crate::types::ProfileSignals {
|
||
industry: signals
|
||
.and_then(|s| s.get("industry"))
|
||
.and_then(|v| v.as_str())
|
||
.map(String::from),
|
||
recent_topic: signals
|
||
.and_then(|s| s.get("recent_topic"))
|
||
.and_then(|v| v.as_str())
|
||
.map(String::from),
|
||
pain_point: signals
|
||
.and_then(|s| s.get("pain_point"))
|
||
.and_then(|v| v.as_str())
|
||
.map(String::from),
|
||
preferred_tool: signals
|
||
.and_then(|s| s.get("preferred_tool"))
|
||
.and_then(|v| v.as_str())
|
||
.map(String::from),
|
||
communication_style: signals
|
||
.and_then(|s| s.get("communication_style"))
|
||
.and_then(|v| v.as_str())
|
||
.map(String::from),
|
||
}
|
||
}
|
||
|
||
/// 从已有记忆推断结构化经验(退化路径)
|
||
fn infer_experiences_from_memories(
|
||
memories: &[ExtractedMemory],
|
||
) -> Vec<crate::types::ExperienceCandidate> {
|
||
memories
|
||
.iter()
|
||
.filter(|m| m.memory_type == crate::types::MemoryType::Experience)
|
||
.filter_map(|m| {
|
||
// 经验类记忆 → ExperienceCandidate
|
||
let content = &m.content;
|
||
if content.len() < 10 {
|
||
return None;
|
||
}
|
||
Some(crate::types::ExperienceCandidate {
|
||
pain_pattern: m.category.clone(),
|
||
context: content.clone(),
|
||
solution_steps: Vec::new(),
|
||
outcome: crate::types::Outcome::Success,
|
||
confidence: m.confidence * 0.7, // 降低推断置信度
|
||
tools_used: m.keywords.clone(),
|
||
industry_context: None,
|
||
})
|
||
})
|
||
.collect()
|
||
}
|
||
|
||
/// 从已有记忆推断画像信号(退化路径)
|
||
fn infer_profile_signals_from_memories(
|
||
memories: &[ExtractedMemory],
|
||
) -> crate::types::ProfileSignals {
|
||
use crate::types::ProfileSignals;
|
||
|
||
let mut signals = ProfileSignals::default();
|
||
for m in memories {
|
||
match m.memory_type {
|
||
crate::types::MemoryType::Preference => {
|
||
if m.category.contains("style") || m.category.contains("风格") {
|
||
if signals.communication_style.is_none() {
|
||
signals.communication_style = Some(m.content.clone());
|
||
}
|
||
}
|
||
}
|
||
crate::types::MemoryType::Knowledge => {
|
||
if signals.recent_topic.is_none() && !m.keywords.is_empty() {
|
||
signals.recent_topic = Some(m.keywords.first().cloned().unwrap_or_default());
|
||
}
|
||
}
|
||
crate::types::MemoryType::Experience => {
|
||
for kw in &m.keywords {
|
||
if signals.preferred_tool.is_none()
|
||
&& m.content.contains(kw.as_str())
|
||
{
|
||
signals.preferred_tool = Some(kw.clone());
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
signals
|
||
}
|
||
|
||
/// Default extraction prompts for LLM
|
||
pub mod prompts {
|
||
use crate::types::MemoryType;
|
||
|
||
/// Get the extraction prompt for a memory type
|
||
pub fn get_extraction_prompt(memory_type: MemoryType) -> &'static str {
|
||
match memory_type {
|
||
MemoryType::Preference => PREFERENCE_EXTRACTION_PROMPT,
|
||
MemoryType::Knowledge => KNOWLEDGE_EXTRACTION_PROMPT,
|
||
MemoryType::Experience => EXPERIENCE_EXTRACTION_PROMPT,
|
||
MemoryType::Session => SESSION_SUMMARY_PROMPT,
|
||
}
|
||
}
|
||
|
||
/// 统一提取 prompt — 单次 LLM 调用同时提取记忆、结构化经验、画像信号
|
||
pub const COMBINED_EXTRACTION_PROMPT: &str = r#"
|
||
分析以下对话,一次性提取三类信息。严格按 JSON 格式返回。
|
||
|
||
## 输出格式
|
||
|
||
```json
|
||
{
|
||
"memories": [
|
||
{
|
||
"memory_type": "preference|knowledge|experience",
|
||
"category": "分类标签",
|
||
"content": "记忆内容",
|
||
"confidence": 0.0-1.0,
|
||
"keywords": ["关键词"]
|
||
}
|
||
],
|
||
"experiences": [
|
||
{
|
||
"pain_pattern": "痛点模式简述",
|
||
"context": "问题发生的上下文",
|
||
"solution_steps": ["步骤1", "步骤2"],
|
||
"outcome": "success|partial|failed",
|
||
"confidence": 0.0-1.0,
|
||
"tools_used": ["使用的工具/技能"],
|
||
"industry_context": "行业标识(可选)"
|
||
}
|
||
],
|
||
"profile_signals": {
|
||
"industry": "用户所在行业(可选)",
|
||
"recent_topic": "最近讨论的主要话题(可选)",
|
||
"pain_point": "用户当前痛点(可选)",
|
||
"preferred_tool": "用户偏好的工具/技能(可选)",
|
||
"communication_style": "沟通风格: concise|detailed|formal|casual(可选)"
|
||
}
|
||
}
|
||
```
|
||
|
||
## 提取规则
|
||
|
||
1. **memories**: 提取用户偏好(沟通风格/格式/语言)、知识(事实/领域知识/经验教训)、使用经验(技能/工具使用模式和结果)
|
||
2. **experiences**: 仅提取明确的"问题→解决"模式,要求有清晰的痛点和步骤,confidence >= 0.6
|
||
3. **profile_signals**: 从对话中推断用户画像信息,只在有明确信号时填写,留空则不填
|
||
4. 每个字段都要有实际内容,不确定的宁可省略
|
||
5. 只返回 JSON,不要附加其他文本
|
||
|
||
对话内容:
|
||
"#;
|
||
|
||
const PREFERENCE_EXTRACTION_PROMPT: &str = r#"
|
||
分析以下对话,提取用户的偏好设置。关注:
|
||
- 沟通风格偏好(简洁/详细、正式/随意)
|
||
- 回复格式偏好(列表/段落、代码块风格)
|
||
- 语言偏好
|
||
- 主题兴趣
|
||
|
||
请以 JSON 格式返回,格式如下:
|
||
[
|
||
{
|
||
"category": "communication-style",
|
||
"content": "用户偏好简洁的回复",
|
||
"confidence": 0.9,
|
||
"keywords": ["简洁", "回复风格"]
|
||
}
|
||
]
|
||
|
||
对话内容:
|
||
"#;
|
||
|
||
const KNOWLEDGE_EXTRACTION_PROMPT: &str = r#"
|
||
分析以下对话,提取有价值的知识。关注:
|
||
- 用户相关事实(职业、项目、背景)
|
||
- 领域知识(技术栈、工具、最佳实践)
|
||
- 经验教训(成功/失败案例)
|
||
|
||
请以 JSON 格式返回,格式如下:
|
||
[
|
||
{
|
||
"category": "user-facts",
|
||
"content": "用户是一名 Rust 开发者",
|
||
"confidence": 0.85,
|
||
"keywords": ["Rust", "开发者"]
|
||
}
|
||
]
|
||
|
||
对话内容:
|
||
"#;
|
||
|
||
const EXPERIENCE_EXTRACTION_PROMPT: &str = r#"
|
||
分析以下对话,提取技能/工具使用经验。关注:
|
||
- 使用的技能或工具
|
||
- 执行结果(成功/失败)
|
||
- 改进建议
|
||
|
||
请以 JSON 格式返回,格式如下:
|
||
[
|
||
{
|
||
"category": "skill-browser",
|
||
"content": "浏览器技能在搜索技术文档时效果很好",
|
||
"confidence": 0.8,
|
||
"keywords": ["浏览器", "搜索", "文档"]
|
||
}
|
||
]
|
||
|
||
对话内容:
|
||
"#;
|
||
|
||
const SESSION_SUMMARY_PROMPT: &str = r#"
|
||
总结以下对话会话。关注:
|
||
- 主要话题
|
||
- 关键决策
|
||
- 未解决问题
|
||
|
||
请以 JSON 格式返回,格式如下:
|
||
{
|
||
"summary": "会话摘要内容",
|
||
"keywords": ["关键词1", "关键词2"],
|
||
"topics": ["主题1", "主题2"]
|
||
}
|
||
|
||
对话内容:
|
||
"#;
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
struct MockLlmDriver;
|
||
|
||
#[async_trait]
|
||
impl LlmDriverForExtraction for MockLlmDriver {
|
||
async fn extract_memories(
|
||
&self,
|
||
_messages: &[Message],
|
||
extraction_type: MemoryType,
|
||
) -> Result<Vec<ExtractedMemory>> {
|
||
Ok(vec![ExtractedMemory::new(
|
||
extraction_type,
|
||
"test-category",
|
||
"test content",
|
||
SessionId::new(),
|
||
)])
|
||
}
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn test_extractor_creation() {
|
||
let driver = Arc::new(MockLlmDriver);
|
||
let extractor = MemoryExtractor::new(driver);
|
||
assert!(extractor.viking.is_none());
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn test_extract_memories() {
|
||
let driver = Arc::new(MockLlmDriver);
|
||
let extractor = MemoryExtractor::new(driver);
|
||
let messages = vec![Message::user("Hello")];
|
||
|
||
let result = extractor
|
||
.extract(&messages, SessionId::new())
|
||
.await
|
||
.unwrap();
|
||
|
||
// Should extract preferences, knowledge, and experience
|
||
assert!(!result.is_empty());
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn test_extract_combined_all_default_impl() {
|
||
let driver = MockLlmDriver;
|
||
let messages = vec![Message::user("Hello")];
|
||
let result = driver.extract_combined_all(&messages).await.unwrap();
|
||
assert_eq!(result.memories.len(), 3); // 3 types
|
||
}
|
||
|
||
#[test]
|
||
fn test_prompts_available() {
|
||
assert!(!prompts::get_extraction_prompt(MemoryType::Preference).is_empty());
|
||
assert!(!prompts::get_extraction_prompt(MemoryType::Knowledge).is_empty());
|
||
assert!(!prompts::get_extraction_prompt(MemoryType::Experience).is_empty());
|
||
assert!(!prompts::get_extraction_prompt(MemoryType::Session).is_empty());
|
||
assert!(!prompts::COMBINED_EXTRACTION_PROMPT.is_empty());
|
||
}
|
||
|
||
#[test]
|
||
fn test_parse_combined_response_full() {
|
||
let raw = r#"```json
|
||
{
|
||
"memories": [
|
||
{
|
||
"memory_type": "preference",
|
||
"category": "communication-style",
|
||
"content": "用户偏好简洁回复",
|
||
"confidence": 0.9,
|
||
"keywords": ["简洁", "风格"]
|
||
},
|
||
{
|
||
"memory_type": "knowledge",
|
||
"category": "user-facts",
|
||
"content": "用户是医院行政人员",
|
||
"confidence": 0.85,
|
||
"keywords": ["医院", "行政"]
|
||
}
|
||
],
|
||
"experiences": [
|
||
{
|
||
"pain_pattern": "报表生成耗时",
|
||
"context": "月度报表需要手动汇总多个Excel",
|
||
"solution_steps": ["使用researcher工具自动抓取", "格式化输出为Excel"],
|
||
"outcome": "success",
|
||
"confidence": 0.85,
|
||
"tools_used": ["researcher"],
|
||
"industry_context": "healthcare"
|
||
}
|
||
],
|
||
"profile_signals": {
|
||
"industry": "healthcare",
|
||
"recent_topic": "报表自动化",
|
||
"pain_point": "手动汇总Excel太慢",
|
||
"preferred_tool": "researcher",
|
||
"communication_style": "concise"
|
||
}
|
||
}
|
||
```"#;
|
||
|
||
let result = super::parse_combined_response(raw, SessionId::new()).unwrap();
|
||
assert_eq!(result.memories.len(), 2);
|
||
assert_eq!(result.experiences.len(), 1);
|
||
assert_eq!(result.experiences[0].pain_pattern, "报表生成耗时");
|
||
assert_eq!(result.experiences[0].outcome, crate::types::Outcome::Success);
|
||
assert_eq!(result.profile_signals.industry.as_deref(), Some("healthcare"));
|
||
assert_eq!(result.profile_signals.pain_point.as_deref(), Some("手动汇总Excel太慢"));
|
||
assert!(result.profile_signals.has_any_signal());
|
||
}
|
||
|
||
#[test]
|
||
fn test_parse_combined_response_minimal() {
|
||
let raw = r#"{"memories": [], "experiences": [], "profile_signals": {}}"#;
|
||
let result = super::parse_combined_response(raw, SessionId::new()).unwrap();
|
||
assert!(result.memories.is_empty());
|
||
assert!(result.experiences.is_empty());
|
||
assert!(!result.profile_signals.has_any_signal());
|
||
}
|
||
|
||
#[test]
|
||
fn test_parse_combined_response_invalid() {
|
||
let raw = "not json at all";
|
||
let result = super::parse_combined_response(raw, SessionId::new());
|
||
assert!(result.is_err());
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn test_extract_combined_fallback() {
|
||
// MockLlmDriver doesn't implement extract_with_prompt, so it falls back
|
||
let driver = Arc::new(MockLlmDriver);
|
||
let extractor = MemoryExtractor::new(driver);
|
||
let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
|
||
|
||
let result = extractor
|
||
.extract_combined(&messages, SessionId::new())
|
||
.await
|
||
.unwrap();
|
||
|
||
// Fallback: extract() produces 3 memories, infer produces experiences from them
|
||
assert!(!result.memories.is_empty());
|
||
}
|
||
}
|