fix(growth): HIGH-6 修复 extract_combined 合并提取空壳
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
根因: growth.rs 构造 CombinedExtraction 时硬编码 experiences: Vec::new() 和 profile_signals: default(),导致 L1 结构化经验不被提取、L2 技能进化 没有输入数据、整个进化引擎无法端到端工作。 修复: - extractor.rs: 添加 COMBINED_EXTRACTION_PROMPT 统一 prompt,单次 LLM 调用 同时输出 memories + experiences + profile_signals - extractor.rs: 添加 parse_combined_response() 解析 LLM JSON 响应 - LlmDriverForExtraction trait: 添加 extract_with_prompt() 方法(默认不支持, 退化到现有 extract() + 启发式推断) - MemoryExtractor: 添加 extract_combined() 方法,优先单次调用,失败则退化 - growth.rs: extract_combined() 使用新的合并提取替代硬编码空值 - TauriExtractionDriver: 实现 extract_with_prompt() - ProfileSignals: 添加 has_any_signal() 方法 - types.rs: ProfileSignals 无 structural 变化(字段已存在) 测试: 4 个新测试(parse_combined_response_full/minimal/invalid + extract_combined_fallback),11 个 extractor 测试全部通过
This commit is contained in:
@@ -21,7 +21,7 @@ pub trait LlmDriverForExtraction: Send + Sync {
|
|||||||
) -> Result<Vec<ExtractedMemory>>;
|
) -> Result<Vec<ExtractedMemory>>;
|
||||||
|
|
||||||
/// 单次 LLM 调用提取全部类型(记忆 + 经验 + 画像信号)
|
/// 单次 LLM 调用提取全部类型(记忆 + 经验 + 画像信号)
|
||||||
/// 默认实现:退化到 3 次独立调用
|
/// 默认实现:退化到 3 次独立调用(experiences 和 profile_signals 为空)
|
||||||
async fn extract_combined_all(
|
async fn extract_combined_all(
|
||||||
&self,
|
&self,
|
||||||
messages: &[Message],
|
messages: &[Message],
|
||||||
@@ -34,6 +34,19 @@ pub trait LlmDriverForExtraction: Send + Sync {
|
|||||||
}
|
}
|
||||||
Ok(combined)
|
Ok(combined)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// 使用自定义 prompt 进行单次 LLM 调用,返回原始文本响应
|
||||||
|
/// 用于统一提取场景,默认返回不支持错误
|
||||||
|
async fn extract_with_prompt(
|
||||||
|
&self,
|
||||||
|
_messages: &[Message],
|
||||||
|
_system_prompt: &str,
|
||||||
|
_user_prompt: &str,
|
||||||
|
) -> Result<String> {
|
||||||
|
Err(zclaw_types::ZclawError::Internal(
|
||||||
|
"extract_with_prompt not implemented".to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Memory Extractor - extracts memories from conversations
|
/// Memory Extractor - extracts memories from conversations
|
||||||
@@ -100,13 +113,10 @@ impl MemoryExtractor {
|
|||||||
session_id: SessionId,
|
session_id: SessionId,
|
||||||
) -> Result<Vec<ExtractedMemory>> {
|
) -> Result<Vec<ExtractedMemory>> {
|
||||||
// Check if LLM driver is available
|
// Check if LLM driver is available
|
||||||
let _llm_driver = match &self.llm_driver {
|
if self.llm_driver.is_none() {
|
||||||
Some(driver) => driver,
|
tracing::debug!("[MemoryExtractor] No LLM driver configured, skipping extraction");
|
||||||
None => {
|
return Ok(Vec::new());
|
||||||
tracing::debug!("[MemoryExtractor] No LLM driver configured, skipping extraction");
|
}
|
||||||
return Ok(Vec::new());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
|
|
||||||
@@ -242,6 +252,299 @@ impl MemoryExtractor {
|
|||||||
tracing::info!("[MemoryExtractor] Stored {} memories to OpenViking", stored);
|
tracing::info!("[MemoryExtractor] Stored {} memories to OpenViking", stored);
|
||||||
Ok(stored)
|
Ok(stored)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// 统一提取:单次 LLM 调用同时产出 memories + experiences + profile_signals
|
||||||
|
///
|
||||||
|
/// 优先使用 `extract_with_prompt()` 进行单次调用;若 driver 不支持则
|
||||||
|
/// 退化为 `extract()` + 从记忆推断经验/画像。
|
||||||
|
pub async fn extract_combined(
|
||||||
|
&self,
|
||||||
|
messages: &[Message],
|
||||||
|
session_id: SessionId,
|
||||||
|
) -> Result<crate::types::CombinedExtraction> {
|
||||||
|
let llm_driver = match &self.llm_driver {
|
||||||
|
Some(driver) => driver,
|
||||||
|
None => {
|
||||||
|
tracing::debug!(
|
||||||
|
"[MemoryExtractor] No LLM driver configured, skipping combined extraction"
|
||||||
|
);
|
||||||
|
return Ok(crate::types::CombinedExtraction::default());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// 尝试单次 LLM 调用路径
|
||||||
|
let system_prompt = "You are a memory extraction assistant. Analyze conversations and extract \
|
||||||
|
structured memories, experiences, and profile signals in valid JSON format. \
|
||||||
|
Always respond with valid JSON only, no additional text or markdown formatting.";
|
||||||
|
let user_prompt = format!(
|
||||||
|
"{}{}",
|
||||||
|
crate::extractor::prompts::COMBINED_EXTRACTION_PROMPT,
|
||||||
|
format_conversation_text(messages)
|
||||||
|
);
|
||||||
|
|
||||||
|
match llm_driver
|
||||||
|
.extract_with_prompt(messages, system_prompt, &user_prompt)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(raw_text) if !raw_text.trim().is_empty() => {
|
||||||
|
match parse_combined_response(&raw_text, session_id.clone()) {
|
||||||
|
Ok(combined) => {
|
||||||
|
tracing::info!(
|
||||||
|
"[MemoryExtractor] Combined extraction: {} memories, {} experiences, {} profile signals",
|
||||||
|
combined.memories.len(),
|
||||||
|
combined.experiences.len(),
|
||||||
|
combined.profile_signals.has_any_signal() as usize,
|
||||||
|
);
|
||||||
|
return Ok(combined);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::warn!(
|
||||||
|
"[MemoryExtractor] Combined response parse failed, falling back: {}",
|
||||||
|
e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(_) => {
|
||||||
|
tracing::debug!("[MemoryExtractor] extract_with_prompt returned empty, falling back");
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
tracing::debug!(
|
||||||
|
"[MemoryExtractor] extract_with_prompt not supported ({}), falling back",
|
||||||
|
e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 退化路径:使用已有的 extract() 然后推断 experiences 和 profile_signals
|
||||||
|
let memories = self.extract(messages, session_id).await?;
|
||||||
|
let experiences = infer_experiences_from_memories(&memories);
|
||||||
|
let profile_signals = infer_profile_signals_from_memories(&memories);
|
||||||
|
|
||||||
|
Ok(crate::types::CombinedExtraction {
|
||||||
|
memories,
|
||||||
|
experiences,
|
||||||
|
profile_signals,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 格式化对话消息为文本
|
||||||
|
fn format_conversation_text(messages: &[Message]) -> String {
|
||||||
|
messages
|
||||||
|
.iter()
|
||||||
|
.filter_map(|msg| match msg {
|
||||||
|
Message::User { content } => Some(format!("[User]: {}", content)),
|
||||||
|
Message::Assistant { content, .. } => Some(format!("[Assistant]: {}", content)),
|
||||||
|
Message::System { content } => Some(format!("[System]: {}", content)),
|
||||||
|
Message::ToolUse { .. } | Message::ToolResult { .. } => None,
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 从 LLM 原始响应解析 CombinedExtraction
|
||||||
|
pub fn parse_combined_response(
|
||||||
|
raw: &str,
|
||||||
|
session_id: SessionId,
|
||||||
|
) -> Result<crate::types::CombinedExtraction> {
|
||||||
|
use crate::types::CombinedExtraction;
|
||||||
|
|
||||||
|
let json_str = crate::json_utils::extract_json_block(raw);
|
||||||
|
let parsed: serde_json::Value = serde_json::from_str(json_str).map_err(|e| {
|
||||||
|
zclaw_types::ZclawError::Internal(format!("Failed to parse combined JSON: {}", e))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// 解析 memories
|
||||||
|
let memories = parsed
|
||||||
|
.get("memories")
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
.map(|arr| {
|
||||||
|
arr.iter()
|
||||||
|
.filter_map(|item| parse_memory_item(item, &session_id))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// 解析 experiences
|
||||||
|
let experiences = parsed
|
||||||
|
.get("experiences")
|
||||||
|
.and_then(|v| v.as_array())
|
||||||
|
.map(|arr| {
|
||||||
|
arr.iter()
|
||||||
|
.filter_map(parse_experience_item)
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
// 解析 profile_signals
|
||||||
|
let profile_signals = parse_profile_signals(&parsed);
|
||||||
|
|
||||||
|
Ok(CombinedExtraction {
|
||||||
|
memories,
|
||||||
|
experiences,
|
||||||
|
profile_signals,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 解析单个 memory 项
|
||||||
|
fn parse_memory_item(
|
||||||
|
value: &serde_json::Value,
|
||||||
|
session_id: &SessionId,
|
||||||
|
) -> Option<ExtractedMemory> {
|
||||||
|
let content = value.get("content")?.as_str()?.to_string();
|
||||||
|
let category = value
|
||||||
|
.get("category")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("unknown")
|
||||||
|
.to_string();
|
||||||
|
let memory_type_str = value
|
||||||
|
.get("memory_type")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("knowledge");
|
||||||
|
let memory_type = crate::types::MemoryType::parse(memory_type_str);
|
||||||
|
let confidence = value
|
||||||
|
.get("confidence")
|
||||||
|
.and_then(|v| v.as_f64())
|
||||||
|
.unwrap_or(0.7) as f32;
|
||||||
|
let keywords = crate::json_utils::extract_string_array(value, "keywords");
|
||||||
|
|
||||||
|
Some(
|
||||||
|
ExtractedMemory::new(memory_type, category, content, session_id.clone())
|
||||||
|
.with_confidence(confidence)
|
||||||
|
.with_keywords(keywords),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 解析单个 experience 项
|
||||||
|
fn parse_experience_item(value: &serde_json::Value) -> Option<crate::types::ExperienceCandidate> {
|
||||||
|
use crate::types::Outcome;
|
||||||
|
|
||||||
|
let pain_pattern = value.get("pain_pattern")?.as_str()?.to_string();
|
||||||
|
let context = value
|
||||||
|
.get("context")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("")
|
||||||
|
.to_string();
|
||||||
|
let solution_steps = crate::json_utils::extract_string_array(value, "solution_steps");
|
||||||
|
let outcome_str = value
|
||||||
|
.get("outcome")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("partial");
|
||||||
|
let outcome = match outcome_str {
|
||||||
|
"success" => Outcome::Success,
|
||||||
|
"failed" => Outcome::Failed,
|
||||||
|
_ => Outcome::Partial,
|
||||||
|
};
|
||||||
|
let confidence = value
|
||||||
|
.get("confidence")
|
||||||
|
.and_then(|v| v.as_f64())
|
||||||
|
.unwrap_or(0.6) as f32;
|
||||||
|
let tools_used = crate::json_utils::extract_string_array(value, "tools_used");
|
||||||
|
let industry_context = value
|
||||||
|
.get("industry_context")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from);
|
||||||
|
|
||||||
|
Some(crate::types::ExperienceCandidate {
|
||||||
|
pain_pattern,
|
||||||
|
context,
|
||||||
|
solution_steps,
|
||||||
|
outcome,
|
||||||
|
confidence,
|
||||||
|
tools_used,
|
||||||
|
industry_context,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 解析 profile_signals
|
||||||
|
fn parse_profile_signals(obj: &serde_json::Value) -> crate::types::ProfileSignals {
|
||||||
|
let signals = obj.get("profile_signals");
|
||||||
|
crate::types::ProfileSignals {
|
||||||
|
industry: signals
|
||||||
|
.and_then(|s| s.get("industry"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from),
|
||||||
|
recent_topic: signals
|
||||||
|
.and_then(|s| s.get("recent_topic"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from),
|
||||||
|
pain_point: signals
|
||||||
|
.and_then(|s| s.get("pain_point"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from),
|
||||||
|
preferred_tool: signals
|
||||||
|
.and_then(|s| s.get("preferred_tool"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from),
|
||||||
|
communication_style: signals
|
||||||
|
.and_then(|s| s.get("communication_style"))
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.map(String::from),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 从已有记忆推断结构化经验(退化路径)
|
||||||
|
fn infer_experiences_from_memories(
|
||||||
|
memories: &[ExtractedMemory],
|
||||||
|
) -> Vec<crate::types::ExperienceCandidate> {
|
||||||
|
memories
|
||||||
|
.iter()
|
||||||
|
.filter(|m| m.memory_type == crate::types::MemoryType::Experience)
|
||||||
|
.filter_map(|m| {
|
||||||
|
// 经验类记忆 → ExperienceCandidate
|
||||||
|
let content = &m.content;
|
||||||
|
if content.len() < 10 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(crate::types::ExperienceCandidate {
|
||||||
|
pain_pattern: m.category.clone(),
|
||||||
|
context: content.clone(),
|
||||||
|
solution_steps: Vec::new(),
|
||||||
|
outcome: crate::types::Outcome::Success,
|
||||||
|
confidence: m.confidence * 0.7, // 降低推断置信度
|
||||||
|
tools_used: m.keywords.clone(),
|
||||||
|
industry_context: None,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 从已有记忆推断画像信号(退化路径)
|
||||||
|
fn infer_profile_signals_from_memories(
|
||||||
|
memories: &[ExtractedMemory],
|
||||||
|
) -> crate::types::ProfileSignals {
|
||||||
|
use crate::types::ProfileSignals;
|
||||||
|
|
||||||
|
let mut signals = ProfileSignals::default();
|
||||||
|
for m in memories {
|
||||||
|
match m.memory_type {
|
||||||
|
crate::types::MemoryType::Preference => {
|
||||||
|
if m.category.contains("style") || m.category.contains("风格") {
|
||||||
|
if signals.communication_style.is_none() {
|
||||||
|
signals.communication_style = Some(m.content.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
crate::types::MemoryType::Knowledge => {
|
||||||
|
if signals.recent_topic.is_none() && !m.keywords.is_empty() {
|
||||||
|
signals.recent_topic = Some(m.keywords.first().cloned().unwrap_or_default());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
crate::types::MemoryType::Experience => {
|
||||||
|
for kw in &m.keywords {
|
||||||
|
if signals.preferred_tool.is_none()
|
||||||
|
&& m.content.contains(kw.as_str())
|
||||||
|
{
|
||||||
|
signals.preferred_tool = Some(kw.clone());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
signals
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Default extraction prompts for LLM
|
/// Default extraction prompts for LLM
|
||||||
@@ -258,6 +561,55 @@ pub mod prompts {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// 统一提取 prompt — 单次 LLM 调用同时提取记忆、结构化经验、画像信号
|
||||||
|
pub const COMBINED_EXTRACTION_PROMPT: &str = r#"
|
||||||
|
分析以下对话,一次性提取三类信息。严格按 JSON 格式返回。
|
||||||
|
|
||||||
|
## 输出格式
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"memories": [
|
||||||
|
{
|
||||||
|
"memory_type": "preference|knowledge|experience",
|
||||||
|
"category": "分类标签",
|
||||||
|
"content": "记忆内容",
|
||||||
|
"confidence": 0.0-1.0,
|
||||||
|
"keywords": ["关键词"]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"experiences": [
|
||||||
|
{
|
||||||
|
"pain_pattern": "痛点模式简述",
|
||||||
|
"context": "问题发生的上下文",
|
||||||
|
"solution_steps": ["步骤1", "步骤2"],
|
||||||
|
"outcome": "success|partial|failed",
|
||||||
|
"confidence": 0.0-1.0,
|
||||||
|
"tools_used": ["使用的工具/技能"],
|
||||||
|
"industry_context": "行业标识(可选)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"profile_signals": {
|
||||||
|
"industry": "用户所在行业(可选)",
|
||||||
|
"recent_topic": "最近讨论的主要话题(可选)",
|
||||||
|
"pain_point": "用户当前痛点(可选)",
|
||||||
|
"preferred_tool": "用户偏好的工具/技能(可选)",
|
||||||
|
"communication_style": "沟通风格: concise|detailed|formal|casual(可选)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 提取规则
|
||||||
|
|
||||||
|
1. **memories**: 提取用户偏好(沟通风格/格式/语言)、知识(事实/领域知识/经验教训)、使用经验(技能/工具使用模式和结果)
|
||||||
|
2. **experiences**: 仅提取明确的"问题→解决"模式,要求有清晰的痛点和步骤,confidence >= 0.6
|
||||||
|
3. **profile_signals**: 从对话中推断用户画像信息,只在有明确信号时填写,留空则不填
|
||||||
|
4. 每个字段都要有实际内容,不确定的宁可省略
|
||||||
|
5. 只返回 JSON,不要附加其他文本
|
||||||
|
|
||||||
|
对话内容:
|
||||||
|
"#;
|
||||||
|
|
||||||
const PREFERENCE_EXTRACTION_PROMPT: &str = r#"
|
const PREFERENCE_EXTRACTION_PROMPT: &str = r#"
|
||||||
分析以下对话,提取用户的偏好设置。关注:
|
分析以下对话,提取用户的偏好设置。关注:
|
||||||
- 沟通风格偏好(简洁/详细、正式/随意)
|
- 沟通风格偏好(简洁/详细、正式/随意)
|
||||||
@@ -391,5 +743,89 @@ mod tests {
|
|||||||
assert!(!prompts::get_extraction_prompt(MemoryType::Knowledge).is_empty());
|
assert!(!prompts::get_extraction_prompt(MemoryType::Knowledge).is_empty());
|
||||||
assert!(!prompts::get_extraction_prompt(MemoryType::Experience).is_empty());
|
assert!(!prompts::get_extraction_prompt(MemoryType::Experience).is_empty());
|
||||||
assert!(!prompts::get_extraction_prompt(MemoryType::Session).is_empty());
|
assert!(!prompts::get_extraction_prompt(MemoryType::Session).is_empty());
|
||||||
|
assert!(!prompts::COMBINED_EXTRACTION_PROMPT.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_combined_response_full() {
|
||||||
|
let raw = r#"```json
|
||||||
|
{
|
||||||
|
"memories": [
|
||||||
|
{
|
||||||
|
"memory_type": "preference",
|
||||||
|
"category": "communication-style",
|
||||||
|
"content": "用户偏好简洁回复",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"keywords": ["简洁", "风格"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"memory_type": "knowledge",
|
||||||
|
"category": "user-facts",
|
||||||
|
"content": "用户是医院行政人员",
|
||||||
|
"confidence": 0.85,
|
||||||
|
"keywords": ["医院", "行政"]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"experiences": [
|
||||||
|
{
|
||||||
|
"pain_pattern": "报表生成耗时",
|
||||||
|
"context": "月度报表需要手动汇总多个Excel",
|
||||||
|
"solution_steps": ["使用researcher工具自动抓取", "格式化输出为Excel"],
|
||||||
|
"outcome": "success",
|
||||||
|
"confidence": 0.85,
|
||||||
|
"tools_used": ["researcher"],
|
||||||
|
"industry_context": "healthcare"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"profile_signals": {
|
||||||
|
"industry": "healthcare",
|
||||||
|
"recent_topic": "报表自动化",
|
||||||
|
"pain_point": "手动汇总Excel太慢",
|
||||||
|
"preferred_tool": "researcher",
|
||||||
|
"communication_style": "concise"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```"#;
|
||||||
|
|
||||||
|
let result = super::parse_combined_response(raw, SessionId::new()).unwrap();
|
||||||
|
assert_eq!(result.memories.len(), 2);
|
||||||
|
assert_eq!(result.experiences.len(), 1);
|
||||||
|
assert_eq!(result.experiences[0].pain_pattern, "报表生成耗时");
|
||||||
|
assert_eq!(result.experiences[0].outcome, crate::types::Outcome::Success);
|
||||||
|
assert_eq!(result.profile_signals.industry.as_deref(), Some("healthcare"));
|
||||||
|
assert_eq!(result.profile_signals.pain_point.as_deref(), Some("手动汇总Excel太慢"));
|
||||||
|
assert!(result.profile_signals.has_any_signal());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_combined_response_minimal() {
|
||||||
|
let raw = r#"{"memories": [], "experiences": [], "profile_signals": {}}"#;
|
||||||
|
let result = super::parse_combined_response(raw, SessionId::new()).unwrap();
|
||||||
|
assert!(result.memories.is_empty());
|
||||||
|
assert!(result.experiences.is_empty());
|
||||||
|
assert!(!result.profile_signals.has_any_signal());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_combined_response_invalid() {
|
||||||
|
let raw = "not json at all";
|
||||||
|
let result = super::parse_combined_response(raw, SessionId::new());
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_extract_combined_fallback() {
|
||||||
|
// MockLlmDriver doesn't implement extract_with_prompt, so it falls back
|
||||||
|
let driver = Arc::new(MockLlmDriver);
|
||||||
|
let extractor = MemoryExtractor::new(driver);
|
||||||
|
let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
|
||||||
|
|
||||||
|
let result = extractor
|
||||||
|
.extract_combined(&messages, SessionId::new())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Fallback: extract() produces 3 memories, infer produces experiences from them
|
||||||
|
assert!(!result.memories.is_empty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -434,6 +434,17 @@ pub struct ProfileSignals {
|
|||||||
pub communication_style: Option<String>,
|
pub communication_style: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ProfileSignals {
|
||||||
|
/// 是否包含至少一个有效信号
|
||||||
|
pub fn has_any_signal(&self) -> bool {
|
||||||
|
self.industry.is_some()
|
||||||
|
|| self.recent_topic.is_some()
|
||||||
|
|| self.pain_point.is_some()
|
||||||
|
|| self.preferred_tool.is_some()
|
||||||
|
|| self.communication_style.is_some()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// 进化事件
|
/// 进化事件
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct EvolutionEvent {
|
pub struct EvolutionEvent {
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ use zclaw_growth::{
|
|||||||
AggregatedPattern, CombinedExtraction, EvolutionConfig, EvolutionEngine,
|
AggregatedPattern, CombinedExtraction, EvolutionConfig, EvolutionEngine,
|
||||||
ExperienceExtractor, GrowthTracker, InjectionFormat,
|
ExperienceExtractor, GrowthTracker, InjectionFormat,
|
||||||
LlmDriverForExtraction, MemoryExtractor, MemoryRetriever, PromptInjector,
|
LlmDriverForExtraction, MemoryExtractor, MemoryRetriever, PromptInjector,
|
||||||
ProfileSignals, RetrievalResult, UserProfileUpdater, VikingAdapter,
|
RetrievalResult, UserProfileUpdater, VikingAdapter,
|
||||||
};
|
};
|
||||||
use zclaw_memory::{ExtractedFactBatch, Fact, FactCategory, UserProfileStore};
|
use zclaw_memory::{ExtractedFactBatch, Fact, FactCategory, UserProfileStore};
|
||||||
use zclaw_types::{AgentId, Message, Result, SessionId};
|
use zclaw_types::{AgentId, Message, Result, SessionId};
|
||||||
@@ -263,8 +263,8 @@ impl GrowthIntegration {
|
|||||||
Ok(count)
|
Ok(count)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Combined extraction: single LLM call that produces both stored memories
|
/// Combined extraction: single LLM call that produces stored memories,
|
||||||
/// and structured facts, avoiding double extraction overhead.
|
/// structured experiences, and profile signals — all in one pass.
|
||||||
///
|
///
|
||||||
/// Returns `(memory_count, Option<ExtractedFactBatch>)` on success.
|
/// Returns `(memory_count, Option<ExtractedFactBatch>)` on success.
|
||||||
pub async fn extract_combined(
|
pub async fn extract_combined(
|
||||||
@@ -277,25 +277,28 @@ impl GrowthIntegration {
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Single LLM extraction call
|
// 单次 LLM 提取:memories + experiences + profile_signals
|
||||||
let extracted = self
|
let combined = self
|
||||||
.extractor
|
.extractor
|
||||||
.extract(messages, session_id.clone())
|
.extract_combined(messages, session_id.clone())
|
||||||
.await
|
.await
|
||||||
.unwrap_or_else(|e| {
|
.unwrap_or_else(|e| {
|
||||||
tracing::warn!("[GrowthIntegration] Combined extraction failed: {}", e);
|
tracing::warn!("[GrowthIntegration] Combined extraction failed: {}", e);
|
||||||
Vec::new()
|
CombinedExtraction::default()
|
||||||
});
|
});
|
||||||
|
|
||||||
if extracted.is_empty() {
|
if combined.memories.is_empty()
|
||||||
|
&& combined.experiences.is_empty()
|
||||||
|
&& !combined.profile_signals.has_any_signal()
|
||||||
|
{
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mem_count = extracted.len();
|
let mem_count = combined.memories.len();
|
||||||
|
|
||||||
// Store raw memories
|
// Store raw memories
|
||||||
self.extractor
|
self.extractor
|
||||||
.store_memories(&agent_id.to_string(), &extracted)
|
.store_memories(&agent_id.to_string(), &combined.memories)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
// Track learning event
|
// Track learning event
|
||||||
@@ -304,14 +307,9 @@ impl GrowthIntegration {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
// Persist structured experiences (L1 enhancement)
|
// Persist structured experiences (L1 enhancement)
|
||||||
let combined_extraction = CombinedExtraction {
|
|
||||||
memories: extracted.clone(),
|
|
||||||
experiences: Vec::new(), // LLM-driven extraction fills this later
|
|
||||||
profile_signals: ProfileSignals::default(),
|
|
||||||
};
|
|
||||||
if let Ok(exp_count) = self
|
if let Ok(exp_count) = self
|
||||||
.experience_extractor
|
.experience_extractor
|
||||||
.persist_experiences(&agent_id.to_string(), &combined_extraction)
|
.persist_experiences(&agent_id.to_string(), &combined)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
if exp_count > 0 {
|
if exp_count > 0 {
|
||||||
@@ -324,9 +322,7 @@ impl GrowthIntegration {
|
|||||||
|
|
||||||
// Update user profile from extraction signals (L1 enhancement)
|
// Update user profile from extraction signals (L1 enhancement)
|
||||||
if let Some(profile_store) = &self.profile_store {
|
if let Some(profile_store) = &self.profile_store {
|
||||||
let updates = self
|
let updates = self.profile_updater.collect_updates(&combined);
|
||||||
.profile_updater
|
|
||||||
.collect_updates(&combined_extraction);
|
|
||||||
let user_id = agent_id.to_string();
|
let user_id = agent_id.to_string();
|
||||||
for update in updates {
|
for update in updates {
|
||||||
if let Err(e) = profile_store
|
if let Err(e) = profile_store
|
||||||
@@ -342,8 +338,9 @@ impl GrowthIntegration {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert same extracted memories to structured facts (no extra LLM call)
|
// Convert extracted memories to structured facts
|
||||||
let facts: Vec<Fact> = extracted
|
let facts: Vec<Fact> = combined
|
||||||
|
.memories
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|m| {
|
.map(|m| {
|
||||||
let category = match m.memory_type {
|
let category = match m.memory_type {
|
||||||
|
|||||||
@@ -225,6 +225,69 @@ impl LlmDriverForExtraction for TauriExtractionDriver {
|
|||||||
|
|
||||||
Ok(memories)
|
Ok(memories)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn extract_with_prompt(
|
||||||
|
&self,
|
||||||
|
messages: &[Message],
|
||||||
|
system_prompt: &str,
|
||||||
|
user_prompt: &str,
|
||||||
|
) -> Result<String> {
|
||||||
|
if messages.len() < 2 {
|
||||||
|
return Err(zclaw_types::Error::msg(
|
||||||
|
"Too few messages for combined extraction",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::debug!(
|
||||||
|
"[TauriExtractionDriver] Combined extraction from {} messages",
|
||||||
|
messages.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
let request = CompletionRequest {
|
||||||
|
model: self.model.clone(),
|
||||||
|
system: Some(system_prompt.to_string()),
|
||||||
|
messages: vec![Message::user(user_prompt.to_string())],
|
||||||
|
tools: Vec::new(),
|
||||||
|
max_tokens: Some(3000),
|
||||||
|
temperature: Some(0.3),
|
||||||
|
stop: Vec::new(),
|
||||||
|
stream: false,
|
||||||
|
thinking_enabled: false,
|
||||||
|
reasoning_effort: None,
|
||||||
|
plan_mode: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let response = self.driver.complete(request).await.map_err(|e| {
|
||||||
|
tracing::error!(
|
||||||
|
"[TauriExtractionDriver] Combined extraction LLM call failed: {}",
|
||||||
|
e
|
||||||
|
);
|
||||||
|
e
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let response_text: String = response
|
||||||
|
.content
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|block| match block {
|
||||||
|
ContentBlock::Text { text } => Some(text),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("");
|
||||||
|
|
||||||
|
if response_text.is_empty() {
|
||||||
|
return Err(zclaw_types::Error::msg(
|
||||||
|
"Empty response from LLM for combined extraction",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
"[TauriExtractionDriver] Combined extraction response: {} chars",
|
||||||
|
response_text.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(response_text)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Global extraction driver instance (legacy path, kept for compatibility).
|
/// Global extraction driver instance (legacy path, kept for compatibility).
|
||||||
|
|||||||
Reference in New Issue
Block a user