fix(growth): HIGH-6 修复 extract_combined 合并提取空壳
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
根因: growth.rs 构造 CombinedExtraction 时硬编码 experiences: Vec::new() 和 profile_signals: default(),导致 L1 结构化经验不被提取、L2 技能进化 没有输入数据、整个进化引擎无法端到端工作。 修复: - extractor.rs: 添加 COMBINED_EXTRACTION_PROMPT 统一 prompt,单次 LLM 调用 同时输出 memories + experiences + profile_signals - extractor.rs: 添加 parse_combined_response() 解析 LLM JSON 响应 - LlmDriverForExtraction trait: 添加 extract_with_prompt() 方法(默认不支持, 退化到现有 extract() + 启发式推断) - MemoryExtractor: 添加 extract_combined() 方法,优先单次调用,失败则退化 - growth.rs: extract_combined() 使用新的合并提取替代硬编码空值 - TauriExtractionDriver: 实现 extract_with_prompt() - ProfileSignals: 添加 has_any_signal() 方法 - types.rs: ProfileSignals 无 structural 变化(字段已存在) 测试: 4 个新测试(parse_combined_response_full/minimal/invalid + extract_combined_fallback),11 个 extractor 测试全部通过
This commit is contained in:
@@ -21,7 +21,7 @@ pub trait LlmDriverForExtraction: Send + Sync {
|
||||
) -> Result<Vec<ExtractedMemory>>;
|
||||
|
||||
/// 单次 LLM 调用提取全部类型(记忆 + 经验 + 画像信号)
|
||||
/// 默认实现:退化到 3 次独立调用
|
||||
/// 默认实现:退化到 3 次独立调用(experiences 和 profile_signals 为空)
|
||||
async fn extract_combined_all(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
@@ -34,6 +34,19 @@ pub trait LlmDriverForExtraction: Send + Sync {
|
||||
}
|
||||
Ok(combined)
|
||||
}
|
||||
|
||||
/// 使用自定义 prompt 进行单次 LLM 调用,返回原始文本响应
|
||||
/// 用于统一提取场景,默认返回不支持错误
|
||||
async fn extract_with_prompt(
|
||||
&self,
|
||||
_messages: &[Message],
|
||||
_system_prompt: &str,
|
||||
_user_prompt: &str,
|
||||
) -> Result<String> {
|
||||
Err(zclaw_types::ZclawError::Internal(
|
||||
"extract_with_prompt not implemented".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory Extractor - extracts memories from conversations
|
||||
@@ -100,13 +113,10 @@ impl MemoryExtractor {
|
||||
session_id: SessionId,
|
||||
) -> Result<Vec<ExtractedMemory>> {
|
||||
// Check if LLM driver is available
|
||||
let _llm_driver = match &self.llm_driver {
|
||||
Some(driver) => driver,
|
||||
None => {
|
||||
tracing::debug!("[MemoryExtractor] No LLM driver configured, skipping extraction");
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
};
|
||||
if self.llm_driver.is_none() {
|
||||
tracing::debug!("[MemoryExtractor] No LLM driver configured, skipping extraction");
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
@@ -242,6 +252,299 @@ impl MemoryExtractor {
|
||||
tracing::info!("[MemoryExtractor] Stored {} memories to OpenViking", stored);
|
||||
Ok(stored)
|
||||
}
|
||||
|
||||
/// 统一提取:单次 LLM 调用同时产出 memories + experiences + profile_signals
|
||||
///
|
||||
/// 优先使用 `extract_with_prompt()` 进行单次调用;若 driver 不支持则
|
||||
/// 退化为 `extract()` + 从记忆推断经验/画像。
|
||||
pub async fn extract_combined(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
session_id: SessionId,
|
||||
) -> Result<crate::types::CombinedExtraction> {
|
||||
let llm_driver = match &self.llm_driver {
|
||||
Some(driver) => driver,
|
||||
None => {
|
||||
tracing::debug!(
|
||||
"[MemoryExtractor] No LLM driver configured, skipping combined extraction"
|
||||
);
|
||||
return Ok(crate::types::CombinedExtraction::default());
|
||||
}
|
||||
};
|
||||
|
||||
// 尝试单次 LLM 调用路径
|
||||
let system_prompt = "You are a memory extraction assistant. Analyze conversations and extract \
|
||||
structured memories, experiences, and profile signals in valid JSON format. \
|
||||
Always respond with valid JSON only, no additional text or markdown formatting.";
|
||||
let user_prompt = format!(
|
||||
"{}{}",
|
||||
crate::extractor::prompts::COMBINED_EXTRACTION_PROMPT,
|
||||
format_conversation_text(messages)
|
||||
);
|
||||
|
||||
match llm_driver
|
||||
.extract_with_prompt(messages, system_prompt, &user_prompt)
|
||||
.await
|
||||
{
|
||||
Ok(raw_text) if !raw_text.trim().is_empty() => {
|
||||
match parse_combined_response(&raw_text, session_id.clone()) {
|
||||
Ok(combined) => {
|
||||
tracing::info!(
|
||||
"[MemoryExtractor] Combined extraction: {} memories, {} experiences, {} profile signals",
|
||||
combined.memories.len(),
|
||||
combined.experiences.len(),
|
||||
combined.profile_signals.has_any_signal() as usize,
|
||||
);
|
||||
return Ok(combined);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"[MemoryExtractor] Combined response parse failed, falling back: {}",
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(_) => {
|
||||
tracing::debug!("[MemoryExtractor] extract_with_prompt returned empty, falling back");
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::debug!(
|
||||
"[MemoryExtractor] extract_with_prompt not supported ({}), falling back",
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// 退化路径:使用已有的 extract() 然后推断 experiences 和 profile_signals
|
||||
let memories = self.extract(messages, session_id).await?;
|
||||
let experiences = infer_experiences_from_memories(&memories);
|
||||
let profile_signals = infer_profile_signals_from_memories(&memories);
|
||||
|
||||
Ok(crate::types::CombinedExtraction {
|
||||
memories,
|
||||
experiences,
|
||||
profile_signals,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// 格式化对话消息为文本
|
||||
fn format_conversation_text(messages: &[Message]) -> String {
|
||||
messages
|
||||
.iter()
|
||||
.filter_map(|msg| match msg {
|
||||
Message::User { content } => Some(format!("[User]: {}", content)),
|
||||
Message::Assistant { content, .. } => Some(format!("[Assistant]: {}", content)),
|
||||
Message::System { content } => Some(format!("[System]: {}", content)),
|
||||
Message::ToolUse { .. } | Message::ToolResult { .. } => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n")
|
||||
}
|
||||
|
||||
/// 从 LLM 原始响应解析 CombinedExtraction
|
||||
pub fn parse_combined_response(
|
||||
raw: &str,
|
||||
session_id: SessionId,
|
||||
) -> Result<crate::types::CombinedExtraction> {
|
||||
use crate::types::CombinedExtraction;
|
||||
|
||||
let json_str = crate::json_utils::extract_json_block(raw);
|
||||
let parsed: serde_json::Value = serde_json::from_str(json_str).map_err(|e| {
|
||||
zclaw_types::ZclawError::Internal(format!("Failed to parse combined JSON: {}", e))
|
||||
})?;
|
||||
|
||||
// 解析 memories
|
||||
let memories = parsed
|
||||
.get("memories")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|item| parse_memory_item(item, &session_id))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// 解析 experiences
|
||||
let experiences = parsed
|
||||
.get("experiences")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(parse_experience_item)
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// 解析 profile_signals
|
||||
let profile_signals = parse_profile_signals(&parsed);
|
||||
|
||||
Ok(CombinedExtraction {
|
||||
memories,
|
||||
experiences,
|
||||
profile_signals,
|
||||
})
|
||||
}
|
||||
|
||||
/// 解析单个 memory 项
|
||||
fn parse_memory_item(
|
||||
value: &serde_json::Value,
|
||||
session_id: &SessionId,
|
||||
) -> Option<ExtractedMemory> {
|
||||
let content = value.get("content")?.as_str()?.to_string();
|
||||
let category = value
|
||||
.get("category")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let memory_type_str = value
|
||||
.get("memory_type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("knowledge");
|
||||
let memory_type = crate::types::MemoryType::parse(memory_type_str);
|
||||
let confidence = value
|
||||
.get("confidence")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.7) as f32;
|
||||
let keywords = crate::json_utils::extract_string_array(value, "keywords");
|
||||
|
||||
Some(
|
||||
ExtractedMemory::new(memory_type, category, content, session_id.clone())
|
||||
.with_confidence(confidence)
|
||||
.with_keywords(keywords),
|
||||
)
|
||||
}
|
||||
|
||||
/// 解析单个 experience 项
|
||||
fn parse_experience_item(value: &serde_json::Value) -> Option<crate::types::ExperienceCandidate> {
|
||||
use crate::types::Outcome;
|
||||
|
||||
let pain_pattern = value.get("pain_pattern")?.as_str()?.to_string();
|
||||
let context = value
|
||||
.get("context")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let solution_steps = crate::json_utils::extract_string_array(value, "solution_steps");
|
||||
let outcome_str = value
|
||||
.get("outcome")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("partial");
|
||||
let outcome = match outcome_str {
|
||||
"success" => Outcome::Success,
|
||||
"failed" => Outcome::Failed,
|
||||
_ => Outcome::Partial,
|
||||
};
|
||||
let confidence = value
|
||||
.get("confidence")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.6) as f32;
|
||||
let tools_used = crate::json_utils::extract_string_array(value, "tools_used");
|
||||
let industry_context = value
|
||||
.get("industry_context")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from);
|
||||
|
||||
Some(crate::types::ExperienceCandidate {
|
||||
pain_pattern,
|
||||
context,
|
||||
solution_steps,
|
||||
outcome,
|
||||
confidence,
|
||||
tools_used,
|
||||
industry_context,
|
||||
})
|
||||
}
|
||||
|
||||
/// 解析 profile_signals
|
||||
fn parse_profile_signals(obj: &serde_json::Value) -> crate::types::ProfileSignals {
|
||||
let signals = obj.get("profile_signals");
|
||||
crate::types::ProfileSignals {
|
||||
industry: signals
|
||||
.and_then(|s| s.get("industry"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from),
|
||||
recent_topic: signals
|
||||
.and_then(|s| s.get("recent_topic"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from),
|
||||
pain_point: signals
|
||||
.and_then(|s| s.get("pain_point"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from),
|
||||
preferred_tool: signals
|
||||
.and_then(|s| s.get("preferred_tool"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from),
|
||||
communication_style: signals
|
||||
.and_then(|s| s.get("communication_style"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from),
|
||||
}
|
||||
}
|
||||
|
||||
/// 从已有记忆推断结构化经验(退化路径)
|
||||
fn infer_experiences_from_memories(
|
||||
memories: &[ExtractedMemory],
|
||||
) -> Vec<crate::types::ExperienceCandidate> {
|
||||
memories
|
||||
.iter()
|
||||
.filter(|m| m.memory_type == crate::types::MemoryType::Experience)
|
||||
.filter_map(|m| {
|
||||
// 经验类记忆 → ExperienceCandidate
|
||||
let content = &m.content;
|
||||
if content.len() < 10 {
|
||||
return None;
|
||||
}
|
||||
Some(crate::types::ExperienceCandidate {
|
||||
pain_pattern: m.category.clone(),
|
||||
context: content.clone(),
|
||||
solution_steps: Vec::new(),
|
||||
outcome: crate::types::Outcome::Success,
|
||||
confidence: m.confidence * 0.7, // 降低推断置信度
|
||||
tools_used: m.keywords.clone(),
|
||||
industry_context: None,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// 从已有记忆推断画像信号(退化路径)
|
||||
fn infer_profile_signals_from_memories(
|
||||
memories: &[ExtractedMemory],
|
||||
) -> crate::types::ProfileSignals {
|
||||
use crate::types::ProfileSignals;
|
||||
|
||||
let mut signals = ProfileSignals::default();
|
||||
for m in memories {
|
||||
match m.memory_type {
|
||||
crate::types::MemoryType::Preference => {
|
||||
if m.category.contains("style") || m.category.contains("风格") {
|
||||
if signals.communication_style.is_none() {
|
||||
signals.communication_style = Some(m.content.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
crate::types::MemoryType::Knowledge => {
|
||||
if signals.recent_topic.is_none() && !m.keywords.is_empty() {
|
||||
signals.recent_topic = Some(m.keywords.first().cloned().unwrap_or_default());
|
||||
}
|
||||
}
|
||||
crate::types::MemoryType::Experience => {
|
||||
for kw in &m.keywords {
|
||||
if signals.preferred_tool.is_none()
|
||||
&& m.content.contains(kw.as_str())
|
||||
{
|
||||
signals.preferred_tool = Some(kw.clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
signals
|
||||
}
|
||||
|
||||
/// Default extraction prompts for LLM
|
||||
@@ -258,6 +561,55 @@ pub mod prompts {
|
||||
}
|
||||
}
|
||||
|
||||
/// 统一提取 prompt — 单次 LLM 调用同时提取记忆、结构化经验、画像信号
|
||||
pub const COMBINED_EXTRACTION_PROMPT: &str = r#"
|
||||
分析以下对话,一次性提取三类信息。严格按 JSON 格式返回。
|
||||
|
||||
## 输出格式
|
||||
|
||||
```json
|
||||
{
|
||||
"memories": [
|
||||
{
|
||||
"memory_type": "preference|knowledge|experience",
|
||||
"category": "分类标签",
|
||||
"content": "记忆内容",
|
||||
"confidence": 0.0-1.0,
|
||||
"keywords": ["关键词"]
|
||||
}
|
||||
],
|
||||
"experiences": [
|
||||
{
|
||||
"pain_pattern": "痛点模式简述",
|
||||
"context": "问题发生的上下文",
|
||||
"solution_steps": ["步骤1", "步骤2"],
|
||||
"outcome": "success|partial|failed",
|
||||
"confidence": 0.0-1.0,
|
||||
"tools_used": ["使用的工具/技能"],
|
||||
"industry_context": "行业标识(可选)"
|
||||
}
|
||||
],
|
||||
"profile_signals": {
|
||||
"industry": "用户所在行业(可选)",
|
||||
"recent_topic": "最近讨论的主要话题(可选)",
|
||||
"pain_point": "用户当前痛点(可选)",
|
||||
"preferred_tool": "用户偏好的工具/技能(可选)",
|
||||
"communication_style": "沟通风格: concise|detailed|formal|casual(可选)"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 提取规则
|
||||
|
||||
1. **memories**: 提取用户偏好(沟通风格/格式/语言)、知识(事实/领域知识/经验教训)、使用经验(技能/工具使用模式和结果)
|
||||
2. **experiences**: 仅提取明确的"问题→解决"模式,要求有清晰的痛点和步骤,confidence >= 0.6
|
||||
3. **profile_signals**: 从对话中推断用户画像信息,只在有明确信号时填写,留空则不填
|
||||
4. 每个字段都要有实际内容,不确定的宁可省略
|
||||
5. 只返回 JSON,不要附加其他文本
|
||||
|
||||
对话内容:
|
||||
"#;
|
||||
|
||||
const PREFERENCE_EXTRACTION_PROMPT: &str = r#"
|
||||
分析以下对话,提取用户的偏好设置。关注:
|
||||
- 沟通风格偏好(简洁/详细、正式/随意)
|
||||
@@ -391,5 +743,89 @@ mod tests {
|
||||
assert!(!prompts::get_extraction_prompt(MemoryType::Knowledge).is_empty());
|
||||
assert!(!prompts::get_extraction_prompt(MemoryType::Experience).is_empty());
|
||||
assert!(!prompts::get_extraction_prompt(MemoryType::Session).is_empty());
|
||||
assert!(!prompts::COMBINED_EXTRACTION_PROMPT.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_combined_response_full() {
|
||||
let raw = r#"```json
|
||||
{
|
||||
"memories": [
|
||||
{
|
||||
"memory_type": "preference",
|
||||
"category": "communication-style",
|
||||
"content": "用户偏好简洁回复",
|
||||
"confidence": 0.9,
|
||||
"keywords": ["简洁", "风格"]
|
||||
},
|
||||
{
|
||||
"memory_type": "knowledge",
|
||||
"category": "user-facts",
|
||||
"content": "用户是医院行政人员",
|
||||
"confidence": 0.85,
|
||||
"keywords": ["医院", "行政"]
|
||||
}
|
||||
],
|
||||
"experiences": [
|
||||
{
|
||||
"pain_pattern": "报表生成耗时",
|
||||
"context": "月度报表需要手动汇总多个Excel",
|
||||
"solution_steps": ["使用researcher工具自动抓取", "格式化输出为Excel"],
|
||||
"outcome": "success",
|
||||
"confidence": 0.85,
|
||||
"tools_used": ["researcher"],
|
||||
"industry_context": "healthcare"
|
||||
}
|
||||
],
|
||||
"profile_signals": {
|
||||
"industry": "healthcare",
|
||||
"recent_topic": "报表自动化",
|
||||
"pain_point": "手动汇总Excel太慢",
|
||||
"preferred_tool": "researcher",
|
||||
"communication_style": "concise"
|
||||
}
|
||||
}
|
||||
```"#;
|
||||
|
||||
let result = super::parse_combined_response(raw, SessionId::new()).unwrap();
|
||||
assert_eq!(result.memories.len(), 2);
|
||||
assert_eq!(result.experiences.len(), 1);
|
||||
assert_eq!(result.experiences[0].pain_pattern, "报表生成耗时");
|
||||
assert_eq!(result.experiences[0].outcome, crate::types::Outcome::Success);
|
||||
assert_eq!(result.profile_signals.industry.as_deref(), Some("healthcare"));
|
||||
assert_eq!(result.profile_signals.pain_point.as_deref(), Some("手动汇总Excel太慢"));
|
||||
assert!(result.profile_signals.has_any_signal());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_combined_response_minimal() {
|
||||
let raw = r#"{"memories": [], "experiences": [], "profile_signals": {}}"#;
|
||||
let result = super::parse_combined_response(raw, SessionId::new()).unwrap();
|
||||
assert!(result.memories.is_empty());
|
||||
assert!(result.experiences.is_empty());
|
||||
assert!(!result.profile_signals.has_any_signal());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_combined_response_invalid() {
|
||||
let raw = "not json at all";
|
||||
let result = super::parse_combined_response(raw, SessionId::new());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_extract_combined_fallback() {
|
||||
// MockLlmDriver doesn't implement extract_with_prompt, so it falls back
|
||||
let driver = Arc::new(MockLlmDriver);
|
||||
let extractor = MemoryExtractor::new(driver);
|
||||
let messages = vec![Message::user("Hello"), Message::assistant("Hi there!")];
|
||||
|
||||
let result = extractor
|
||||
.extract_combined(&messages, SessionId::new())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Fallback: extract() produces 3 memories, infer produces experiences from them
|
||||
assert!(!result.memories.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -434,6 +434,17 @@ pub struct ProfileSignals {
|
||||
pub communication_style: Option<String>,
|
||||
}
|
||||
|
||||
impl ProfileSignals {
|
||||
/// 是否包含至少一个有效信号
|
||||
pub fn has_any_signal(&self) -> bool {
|
||||
self.industry.is_some()
|
||||
|| self.recent_topic.is_some()
|
||||
|| self.pain_point.is_some()
|
||||
|| self.preferred_tool.is_some()
|
||||
|| self.communication_style.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
/// 进化事件
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EvolutionEvent {
|
||||
|
||||
Reference in New Issue
Block a user