feat(hermes): implement intelligence pipeline — 4 chunks, 684 tests passing

Hermes Intelligence Pipeline closes breakpoints in ZCLAW's existing
intelligence components with 4 self-contained modules:

Chunk 1 — Self-improvement Loop:
- ExperienceStore (zclaw-growth): FTS5+TF-IDF wrapper with scope prefix
- ExperienceExtractor (desktop/intelligence): template-based extraction
  from successful proposals with implicit keyword detection

Chunk 2 — User Modeling:
- UserProfileStore (zclaw-memory): SQLite-backed structured profiles
  with industry/role/expertise/comm_style/recent_topics/pain_points
- UserProfiler (desktop/intelligence): fact classification by category
  (Preference/Knowledge/Behavior) with profile summary formatting

Chunk 3 — NL Cron Chinese Time Parser:
- NlScheduleParser (zclaw-runtime): 6 pattern matchers for Chinese time
  expressions (每天/每周/工作日/间隔/每月/一次性) producing cron expressions
- Period-aware hour adjustment (下午3点→15, 晚上8点→20)
- Schedule intent detection + task description extraction

Chunk 4 — Trajectory Compression:
- TrajectoryStore (zclaw-memory): trajectory_events + compressed_trajectories
- TrajectoryRecorderMiddleware (zclaw-runtime/middleware): priority 650,
  async non-blocking event recording via tokio::spawn
- TrajectoryCompressor (desktop/intelligence): dedup, request classification,
  satisfaction detection, execution chain JSON

Schema migrations: v2→v3 (user_profiles), v3→v4 (trajectory tables)
This commit is contained in:
iven
2026-04-09 17:47:43 +08:00
parent 0883bb28ff
commit 4b15ead8e7
15 changed files with 4225 additions and 0 deletions

View File

@@ -0,0 +1,394 @@
//! Experience Extractor — transforms successful proposals into reusable experiences.
//!
//! Closes Breakpoint 3 (successful solution → structured experience) and
//! Breakpoint 4 (experience reuse injection) of the self-improvement loop.
//!
//! When a user confirms a proposal was helpful (explicitly or via implicit
//! keyword detection), the extractor creates an [`Experience`] record and
//! stores it through [`ExperienceStore`] for future retrieval.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use tracing::{debug, warn};
use uuid::Uuid;
use zclaw_growth::ExperienceStore;
use zclaw_types::Result;
use super::pain_aggregator::PainPoint;
use super::solution_generator::{Proposal, ProposalStatus};
// ---------------------------------------------------------------------------
// Shared completion status
// ---------------------------------------------------------------------------
/// Completion outcome — shared across experience and trajectory modules.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CompletionStatus {
Success,
Partial,
Failed,
Abandoned,
}
// ---------------------------------------------------------------------------
// Feedback & event types
// ---------------------------------------------------------------------------
/// User feedback on a proposal's effectiveness.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProposalFeedback {
pub proposal_id: String,
pub outcome: CompletionStatus,
pub user_comment: Option<String>,
pub detected_at: DateTime<Utc>,
}
/// Event emitted when a pain point reaches high confidence.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PainConfirmedEvent {
pub pain_point_id: String,
pub pattern: String,
pub confidence: f64,
}
// ---------------------------------------------------------------------------
// Implicit feedback detection
// ---------------------------------------------------------------------------
const POSITIVE_KEYWORDS: &[&str] = &[
"好了", "解决了", "可以了", "对了", "完美",
"谢谢", "很好", "", "不错", "成功了",
"行了", "搞定了", "OK", "ok", "搞定",
];
const NEGATIVE_KEYWORDS: &[&str] = &[
"没用", "不对", "还是不行", "错了", "差太远",
"不好使", "不管用", "没效果", "失败", "不行",
];
/// Detect implicit feedback from user messages.
/// Returns `Some(CompletionStatus)` if a clear signal is found.
pub fn detect_implicit_feedback(message: &str) -> Option<CompletionStatus> {
let lower = message.to_lowercase();
for kw in POSITIVE_KEYWORDS {
if lower.contains(kw) {
return Some(CompletionStatus::Success);
}
}
for kw in NEGATIVE_KEYWORDS {
if lower.contains(kw) {
return Some(CompletionStatus::Failed);
}
}
None
}
// ---------------------------------------------------------------------------
// ExperienceExtractor
// ---------------------------------------------------------------------------
/// Extracts structured experiences from successful proposals.
///
/// Two extraction strategies:
/// 1. **LLM-assisted** — uses LLM to summarise context + steps (when driver available)
/// 2. **Template fallback** — fixed-format extraction from proposal fields
pub struct ExperienceExtractor {
experience_store: std::sync::Arc<ExperienceStore>,
}
impl ExperienceExtractor {
pub fn new(experience_store: std::sync::Arc<ExperienceStore>) -> Self {
Self { experience_store }
}
/// Extract and store an experience from a successful proposal + pain point.
///
/// Uses template extraction as the default strategy. LLM-assisted extraction
/// can be added later by wiring a driver through the constructor.
pub async fn extract_from_proposal(
&self,
proposal: &Proposal,
pain: &PainPoint,
feedback: &ProposalFeedback,
) -> Result<()> {
if feedback.outcome != CompletionStatus::Success && feedback.outcome != CompletionStatus::Partial {
debug!(
"[ExperienceExtractor] Skipping non-success proposal {} ({:?})",
proposal.id, feedback.outcome
);
return Ok(());
}
let experience = self.template_extract(proposal, pain, feedback);
self.experience_store.store_experience(&experience).await?;
debug!(
"[ExperienceExtractor] Stored experience {} for pain '{}'",
experience.id, experience.pain_pattern
);
Ok(())
}
/// Template-based extraction — deterministic, no LLM required.
fn template_extract(
&self,
proposal: &Proposal,
pain: &PainPoint,
feedback: &ProposalFeedback,
) -> zclaw_growth::experience_store::Experience {
let solution_steps: Vec<String> = proposal.steps.iter()
.map(|s| {
if let Some(ref hint) = s.skill_hint {
format!("{} (工具: {})", s.detail, hint)
} else {
s.detail.clone()
}
})
.collect();
let context = format!(
"痛点: {} | 类别: {} | 出现{}次 | 证据: {}",
pain.summary,
pain.category,
pain.occurrence_count,
pain.evidence.iter()
.map(|e| e.user_said.as_str())
.collect::<Vec<_>>()
.join("")
);
let outcome = match feedback.outcome {
CompletionStatus::Success => "成功解决",
CompletionStatus::Partial => "部分解决",
CompletionStatus::Failed => "未解决",
CompletionStatus::Abandoned => "已放弃",
};
zclaw_growth::experience_store::Experience::new(
&pain.agent_id,
&pain.summary,
&context,
solution_steps,
outcome,
)
}
/// Search for relevant experiences to inject into a conversation.
///
/// Returns experiences whose pain pattern matches the user's current input.
pub async fn find_relevant_experiences(
&self,
agent_id: &str,
user_input: &str,
) -> Vec<zclaw_growth::experience_store::Experience> {
match self.experience_store.find_by_pattern(agent_id, user_input).await {
Ok(experiences) => {
if !experiences.is_empty() {
// Increment reuse count for found experiences (fire-and-forget)
for exp in &experiences {
let store = self.experience_store.clone();
let exp_clone = exp.clone();
tokio::spawn(async move {
store.increment_reuse(&exp_clone).await;
});
}
}
experiences
}
Err(e) => {
warn!("[ExperienceExtractor] find_relevant failed: {}", e);
Vec::new()
}
}
}
/// Format experiences for system prompt injection.
/// Returns a concise block capped at ~200 Chinese characters.
pub fn format_for_injection(
experiences: &[zclaw_growth::experience_store::Experience],
) -> String {
if experiences.is_empty() {
return String::new();
}
let mut parts = Vec::new();
let mut total_chars = 0;
let max_chars = 200;
for exp in experiences {
if total_chars >= max_chars {
break;
}
let step_summary = exp.solution_steps.first()
.map(|s| truncate(s, 40))
.unwrap_or_default();
let line = format!(
"[过往经验] 类似「{}」做过:{},结果是{}",
truncate(&exp.pain_pattern, 30),
step_summary,
exp.outcome
);
total_chars += line.chars().count();
parts.push(line);
}
if parts.is_empty() {
return String::new();
}
format!("\n\n--- 过往经验参考 ---\n{}", parts.join("\n"))
}
}
fn truncate(s: &str, max_chars: usize) -> String {
if s.chars().count() <= max_chars {
s.to_string()
} else {
s.chars().take(max_chars).collect::<String>() + ""
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::intelligence::pain_aggregator::PainSeverity;
fn sample_pain() -> PainPoint {
PainPoint::new(
"agent-1",
"user-1",
"出口包装不合格",
"logistics",
PainSeverity::High,
"又被退了",
"recurring packaging issue",
)
}
fn sample_proposal(pain: &PainPoint) -> Proposal {
Proposal::from_pain_point(pain)
}
#[test]
fn test_detect_positive_feedback() {
assert_eq!(
detect_implicit_feedback("好了,这下解决了"),
Some(CompletionStatus::Success)
);
assert_eq!(
detect_implicit_feedback("谢谢,完美"),
Some(CompletionStatus::Success)
);
}
#[test]
fn test_detect_negative_feedback() {
assert_eq!(
detect_implicit_feedback("还是不行"),
Some(CompletionStatus::Failed)
);
assert_eq!(
detect_implicit_feedback("没用啊"),
Some(CompletionStatus::Failed)
);
}
#[test]
fn test_no_feedback() {
assert_eq!(detect_implicit_feedback("今天天气怎么样"), None);
assert_eq!(detect_implicit_feedback("帮我查一下"), None);
}
#[test]
fn test_template_extract() {
let viking = std::sync::Arc::new(zclaw_growth::VikingAdapter::in_memory());
let store = std::sync::Arc::new(ExperienceStore::new(viking));
let extractor = ExperienceExtractor::new(store);
let pain = sample_pain();
let proposal = sample_proposal(&pain);
let feedback = ProposalFeedback {
proposal_id: proposal.id.clone(),
outcome: CompletionStatus::Success,
user_comment: Some("好了".into()),
detected_at: Utc::now(),
};
let exp = extractor.template_extract(&proposal, &pain, &feedback);
assert!(!exp.id.is_empty());
assert_eq!(exp.agent_id, "agent-1");
assert!(!exp.solution_steps.is_empty());
assert_eq!(exp.outcome, "成功解决");
}
#[test]
fn test_format_for_injection_empty() {
assert!(ExperienceExtractor::format_for_injection(&[]).is_empty());
}
#[test]
fn test_format_for_injection_with_data() {
let exp = zclaw_growth::experience_store::Experience::new(
"agent-1",
"出口包装问题",
"包装被退回",
vec!["检查法规".into(), "使用合规材料".into()],
"成功解决",
);
let formatted = ExperienceExtractor::format_for_injection(&[exp]);
assert!(formatted.contains("过往经验"));
assert!(formatted.contains("出口包装问题"));
}
#[tokio::test]
async fn test_extract_stores_experience() {
let viking = std::sync::Arc::new(zclaw_growth::VikingAdapter::in_memory());
let store = std::sync::Arc::new(ExperienceStore::new(viking));
let extractor = ExperienceExtractor::new(store.clone());
let pain = sample_pain();
let proposal = sample_proposal(&pain);
let feedback = ProposalFeedback {
proposal_id: proposal.id.clone(),
outcome: CompletionStatus::Success,
user_comment: Some("好了".into()),
detected_at: Utc::now(),
};
extractor.extract_from_proposal(&proposal, &pain, &feedback).await.unwrap();
let found = store.find_by_agent("agent-1").await.unwrap();
assert_eq!(found.len(), 1);
}
#[tokio::test]
async fn test_extract_skips_failed_feedback() {
let viking = std::sync::Arc::new(zclaw_growth::VikingAdapter::in_memory());
let store = std::sync::Arc::new(ExperienceStore::new(viking));
let extractor = ExperienceExtractor::new(store.clone());
let pain = sample_pain();
let proposal = sample_proposal(&pain);
let feedback = ProposalFeedback {
proposal_id: proposal.id.clone(),
outcome: CompletionStatus::Failed,
user_comment: Some("没用".into()),
detected_at: Utc::now(),
};
extractor.extract_from_proposal(&proposal, &pain, &feedback).await.unwrap();
let found = store.find_by_agent("agent-1").await.unwrap();
assert!(found.is_empty(), "Should not store experience for failed feedback");
}
#[test]
fn test_truncate() {
assert_eq!(truncate("hello", 10), "hello");
assert_eq!(truncate("这是一个很长的字符串用于测试截断", 10).chars().count(), 11); // 10 + …
}
}

View File

@@ -36,6 +36,9 @@ pub mod pain_aggregator;
pub mod solution_generator;
pub mod personality_detector;
pub mod pain_storage;
pub mod experience;
pub mod user_profiler;
pub mod trajectory_compressor;
// Re-export main types for convenience
pub use heartbeat::HeartbeatEngineState;

View File

@@ -0,0 +1,328 @@
//! Trajectory Compressor — compresses raw events into structured trajectories.
//!
//! Takes a list of `TrajectoryEvent` records and produces a single
//! `CompressedTrajectory` summarising the session. Called at session end
//! (or compaction flush) to reduce storage and prepare data for analysis.
use chrono::Utc;
use zclaw_memory::trajectory_store::{
CompressedTrajectory, CompletionStatus, SatisfactionSignal, TrajectoryEvent, TrajectoryStepType,
};
// ---------------------------------------------------------------------------
// Satisfaction detection
// ---------------------------------------------------------------------------
const POSITIVE_SIGNALS: &[&str] = &[
"谢谢", "很好", "解决了", "可以了", "对了", "完美",
"", "不错", "成功了", "行了", "搞定",
];
const NEGATIVE_SIGNALS: &[&str] = &[
"不对", "没用", "还是不行", "错了", "差太远",
"不好使", "不管用", "没效果", "失败", "不行",
];
/// Detect user satisfaction from the last few messages.
pub fn detect_satisfaction(last_messages: &[String]) -> Option<SatisfactionSignal> {
if last_messages.is_empty() {
return None;
}
// Check the last user messages for satisfaction signals
for msg in last_messages.iter().rev().take(3) {
let lower = msg.to_lowercase();
for kw in POSITIVE_SIGNALS {
if lower.contains(kw) {
return Some(SatisfactionSignal::Positive);
}
}
for kw in NEGATIVE_SIGNALS {
if lower.contains(kw) {
return Some(SatisfactionSignal::Negative);
}
}
}
Some(SatisfactionSignal::Neutral)
}
// ---------------------------------------------------------------------------
// Compression
// ---------------------------------------------------------------------------
/// Compress a sequence of trajectory events into a single summary.
///
/// Returns `None` if the events list is empty.
pub fn compress(
events: Vec<TrajectoryEvent>,
satisfaction: Option<SatisfactionSignal>,
) -> Option<CompressedTrajectory> {
if events.is_empty() {
return None;
}
let session_id = events[0].session_id.clone();
let agent_id = events[0].agent_id.clone();
// Extract key steps (skip retries — consecutive same-type steps)
let key_events = deduplicate_steps(&events);
let request_type = infer_request_type(&key_events);
let tools_used = extract_tools(&key_events);
let total_steps = key_events.len();
let total_duration_ms: u64 = events.iter().map(|e| e.duration_ms).sum();
let outcome = infer_outcome(&key_events, satisfaction);
let execution_chain = build_chain_json(&key_events);
Some(CompressedTrajectory {
id: uuid::Uuid::new_v4().to_string(),
session_id,
agent_id,
request_type,
tools_used,
outcome,
total_steps,
total_duration_ms,
total_tokens: 0, // filled by middleware from context
execution_chain,
satisfaction_signal: satisfaction,
created_at: Utc::now(),
})
}
/// Remove consecutive duplicate step types (retries/error recovery).
fn deduplicate_steps(events: &[TrajectoryEvent]) -> Vec<&TrajectoryEvent> {
let mut result = Vec::new();
let mut last_type: Option<TrajectoryStepType> = None;
for event in events {
// Keep first occurrence of each step type change
if last_type != Some(event.step_type) {
result.push(event);
last_type = Some(event.step_type);
}
}
// If we deduplicated everything away, keep the first and last
if result.is_empty() && !events.is_empty() {
result.push(&events[0]);
if events.len() > 1 {
result.push(&events[events.len() - 1]);
}
}
result
}
/// Infer request type from the first user request event.
fn infer_request_type(events: &[&TrajectoryEvent]) -> String {
for event in events {
if event.step_type == TrajectoryStepType::UserRequest {
let input = &event.input_summary;
return classify_request(input);
}
}
"general".to_string()
}
fn classify_request(input: &str) -> String {
let lower = input.to_lowercase();
if ["报告", "数据", "统计", "报表", "汇总"].iter().any(|k| lower.contains(k)) {
return "data_report".into();
}
if ["政策", "法规", "合规", "标准"].iter().any(|k| lower.contains(k)) {
return "policy_query".into();
}
if ["查房", "巡房"].iter().any(|k| lower.contains(k)) {
return "inspection".into();
}
if ["排班", "值班"].iter().any(|k| lower.contains(k)) {
return "scheduling".into();
}
if ["会议", "日程", "安排", "提醒"].iter().any(|k| lower.contains(k)) {
return "meeting".into();
}
if ["检查"].iter().any(|k| lower.contains(k)) {
return "inspection".into();
}
"general".to_string()
}
/// Extract unique tool names from ToolExecution events.
fn extract_tools(events: &[&TrajectoryEvent]) -> Vec<String> {
let mut tools = Vec::new();
let mut seen = std::collections::HashSet::new();
for event in events {
if event.step_type == TrajectoryStepType::ToolExecution {
let tool = event.input_summary.clone();
if !tool.is_empty() && seen.insert(tool.clone()) {
tools.push(tool);
}
}
}
tools
}
/// Infer completion outcome from last step + satisfaction signal.
fn infer_outcome(
events: &[&TrajectoryEvent],
satisfaction: Option<SatisfactionSignal>,
) -> CompletionStatus {
match satisfaction {
Some(SatisfactionSignal::Positive) => CompletionStatus::Success,
Some(SatisfactionSignal::Negative) => CompletionStatus::Failed,
Some(SatisfactionSignal::Neutral) => {
// Check if last meaningful step was a successful LLM generation
if events.iter().any(|e| e.step_type == TrajectoryStepType::LlmGeneration) {
CompletionStatus::Partial
} else {
CompletionStatus::Abandoned
}
}
None => CompletionStatus::Partial,
}
}
/// Build JSON execution chain from key events.
fn build_chain_json(events: &[&TrajectoryEvent]) -> String {
let chain: Vec<serde_json::Value> = events.iter().map(|e| {
serde_json::json!({
"step": e.step_index,
"type": e.step_type.as_str(),
"input": truncate(&e.input_summary, 100),
"output": truncate(&e.output_summary, 100),
})
}).collect();
serde_json::to_string(&chain).unwrap_or_else(|_| "[]".to_string())
}
fn truncate(s: &str, max: usize) -> String {
if s.chars().count() <= max {
s.to_string()
} else {
s.chars().take(max).collect::<String>() + ""
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
fn make_event(index: usize, step_type: TrajectoryStepType, input: &str, output: &str) -> TrajectoryEvent {
TrajectoryEvent {
id: format!("evt-{}", index),
session_id: "sess-1".to_string(),
agent_id: "agent-1".to_string(),
step_index: index,
step_type,
input_summary: input.to_string(),
output_summary: output.to_string(),
duration_ms: 100,
timestamp: Utc::now(),
}
}
#[test]
fn test_compress_empty() {
assert!(compress(vec![], None).is_none());
}
#[test]
fn test_compress_single_event() {
let events = vec![make_event(0, TrajectoryStepType::UserRequest, "帮我查数据", "")];
let ct = compress(events, None).unwrap();
assert_eq!(ct.session_id, "sess-1");
assert_eq!(ct.total_steps, 1);
}
#[test]
fn test_compress_full_chain() {
let events = vec![
make_event(0, TrajectoryStepType::UserRequest, "帮我生成月度报告", ""),
make_event(1, TrajectoryStepType::ToolExecution, "collector", "5条数据"),
make_event(2, TrajectoryStepType::LlmGeneration, "", "报告已生成"),
];
let ct = compress(events, Some(SatisfactionSignal::Positive)).unwrap();
assert_eq!(ct.request_type, "data_report");
assert_eq!(ct.tools_used, vec!["collector"]);
assert_eq!(ct.outcome, CompletionStatus::Success);
assert!(ct.execution_chain.starts_with('['));
}
#[test]
fn test_deduplicate_retries() {
let events = vec![
make_event(0, TrajectoryStepType::ToolExecution, "tool-a", "err"),
make_event(1, TrajectoryStepType::ToolExecution, "tool-a", "ok"),
make_event(2, TrajectoryStepType::LlmGeneration, "", "done"),
];
let deduped = deduplicate_steps(&events);
assert_eq!(deduped.len(), 2); // first ToolExecution + LlmGeneration
}
#[test]
fn test_classify_request() {
assert_eq!(classify_request("帮我生成月度报告"), "data_report");
assert_eq!(classify_request("最新的合规政策是什么"), "policy_query");
assert_eq!(classify_request("明天有什么会议"), "meeting");
assert_eq!(classify_request("查房安排"), "inspection");
assert_eq!(classify_request("你好"), "general");
}
#[test]
fn test_detect_satisfaction_positive() {
let msgs = vec!["谢谢,很好用".to_string()];
assert_eq!(detect_satisfaction(&msgs), Some(SatisfactionSignal::Positive));
}
#[test]
fn test_detect_satisfaction_negative() {
let msgs = vec!["还是不行啊".to_string()];
assert_eq!(detect_satisfaction(&msgs), Some(SatisfactionSignal::Negative));
}
#[test]
fn test_detect_satisfaction_neutral() {
let msgs = vec!["好的我知道了".to_string()];
assert_eq!(detect_satisfaction(&msgs), Some(SatisfactionSignal::Neutral));
}
#[test]
fn test_detect_satisfaction_empty() {
assert_eq!(detect_satisfaction(&[]), None);
}
#[test]
fn test_infer_outcome() {
let events = vec![make_event(0, TrajectoryStepType::LlmGeneration, "", "ok")];
assert_eq!(
infer_outcome(&events.iter().collect::<Vec<_>>(), Some(SatisfactionSignal::Positive)),
CompletionStatus::Success
);
assert_eq!(
infer_outcome(&events.iter().collect::<Vec<_>>(), Some(SatisfactionSignal::Negative)),
CompletionStatus::Failed
);
}
#[test]
fn test_extract_tools_dedup() {
let events = vec![
make_event(0, TrajectoryStepType::ToolExecution, "researcher", ""),
make_event(1, TrajectoryStepType::ToolExecution, "researcher", ""),
make_event(2, TrajectoryStepType::ToolExecution, "collector", ""),
];
let refs: Vec<&TrajectoryEvent> = events.iter().collect();
let tools = extract_tools(&refs);
assert_eq!(tools, vec!["researcher", "collector"]);
}
}

View File

@@ -0,0 +1,369 @@
//! User Profiler — aggregates extracted facts into a structured user profile.
//!
//! Takes `ExtractedFactBatch` from the growth pipeline, classifies facts by
//! category, and updates the `UserProfile` via `UserProfileStore`.
//!
//! Desktop uses "default_user" as the single user ID.
use std::sync::Arc;
use chrono::Utc;
use tracing::{debug, warn};
use zclaw_memory::fact::{Fact, FactCategory};
use zclaw_memory::user_profile_store::{
CommStyle, Level, UserProfile, UserProfileStore,
};
use zclaw_types::Result;
/// Default user ID for single-user desktop mode.
const DEFAULT_USER: &str = "default_user";
// ---------------------------------------------------------------------------
// Classification helpers
// ---------------------------------------------------------------------------
/// Maps a fact category to the profile field it should update.
enum ProfileFieldUpdate {
Industry(String),
Role(String),
ExpertiseLevel(Level),
CommunicationStyle(CommStyle),
PreferredTool(String),
RecentTopic(String),
}
/// Classify a fact content into a profile update.
fn classify_fact_content(fact: &Fact) -> Option<ProfileFieldUpdate> {
let content = fact.content.to_lowercase();
// Communication style detection
if content.contains("简洁") || content.contains("简短") || content.contains("简单说") {
return Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Concise));
}
if content.contains("详细") || content.contains("展开说") || content.contains("多说点") {
return Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Detailed));
}
if content.contains("正式") || content.contains("专业") || content.contains("官方") {
return Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Formal));
}
if content.contains("随意") || content.contains("轻松") || content.contains("随便") {
return Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Casual));
}
// Industry / role detection
if content.contains("医疗") || content.contains("医院") || content.contains("诊所") {
return Some(ProfileFieldUpdate::Industry("医疗".into()));
}
if content.contains("制造") || content.contains("工厂") || content.contains("生产") {
return Some(ProfileFieldUpdate::Industry("制造业".into()));
}
if content.contains("教育") || content.contains("学校") || content.contains("教学") {
return Some(ProfileFieldUpdate::Industry("教育".into()));
}
if content.contains("行政") || content.contains("主任") || content.contains("管理") {
return Some(ProfileFieldUpdate::Role("行政管理".into()));
}
if content.contains("工程师") || content.contains("开发") || content.contains("技术") {
return Some(ProfileFieldUpdate::Role("技术人员".into()));
}
if content.contains("医生") || content.contains("护士") || content.contains("临床") {
return Some(ProfileFieldUpdate::Role("医务人员".into()));
}
// Expertise level
if content.contains("新手") || content.contains("不会") || content.contains("不了解") {
return Some(ProfileFieldUpdate::ExpertiseLevel(Level::Beginner));
}
if content.contains("熟练") || content.contains("熟悉") || content.contains("常用") {
return Some(ProfileFieldUpdate::ExpertiseLevel(Level::Expert));
}
// Tool preferences
if content.contains("用研究") || content.contains("帮我查") || content.contains("调研") {
return Some(ProfileFieldUpdate::PreferredTool("researcher".into()));
}
if content.contains("收集") || content.contains("整理") || content.contains("汇总") {
return Some(ProfileFieldUpdate::PreferredTool("collector".into()));
}
if content.contains("幻灯") || content.contains("演示") || content.contains("ppt") {
return Some(ProfileFieldUpdate::PreferredTool("slideshow".into()));
}
// Default: treat as a recent topic
if fact.confidence >= 0.6 {
let topic = truncate(&fact.content, 30);
return Some(ProfileFieldUpdate::RecentTopic(topic));
}
None
}
// ---------------------------------------------------------------------------
// UserProfiler
// ---------------------------------------------------------------------------
/// Aggregates extracted facts into a structured user profile.
pub struct UserProfiler {
store: Arc<UserProfileStore>,
}
impl UserProfiler {
pub fn new(store: Arc<UserProfileStore>) -> Self {
Self { store }
}
/// Main entry point: update profile from extracted facts.
pub async fn update_from_facts(
&self,
facts: &[Fact],
) -> Result<()> {
if facts.is_empty() {
return Ok(());
}
for fact in facts {
if let Some(update) = classify_fact_content(fact) {
if let Err(e) = self.apply_update(&update).await {
warn!("[UserProfiler] Failed to apply update: {}", e);
}
}
}
// Update confidence based on number of classified facts
self.update_confidence().await;
debug!("[UserProfiler] Updated profile from {} facts", facts.len());
Ok(())
}
/// Update active pain points in the profile.
pub async fn update_pain_points(
&self,
pains: Vec<String>,
) -> Result<()> {
// Replace all pain points by loading, modifying, and upserting
let mut profile = self.get_or_create_profile().await;
profile.active_pain_points = pains;
profile.updated_at = Utc::now();
self.store.upsert(&profile).await
}
/// Format relevant profile attributes for injection into system prompt.
/// Caps output at ~200 Chinese characters (≈100 tokens).
pub fn format_profile_summary(profile: &UserProfile, topic: &str) -> Option<String> {
let mut parts = Vec::new();
if let Some(ref industry) = profile.industry {
parts.push(format!("行业: {}", industry));
}
if let Some(ref role) = profile.role {
parts.push(format!("角色: {}", role));
}
if let Some(ref level) = profile.expertise_level {
let level_str = match level {
Level::Beginner => "入门",
Level::Intermediate => "中级",
Level::Expert => "专家",
};
parts.push(format!("水平: {}", level_str));
}
if let Some(ref style) = profile.communication_style {
let style_str = match style {
CommStyle::Concise => "简洁",
CommStyle::Detailed => "详细",
CommStyle::Formal => "正式",
CommStyle::Casual => "随意",
};
parts.push(format!("沟通风格: {}", style_str));
}
// Only add topics relevant to the current conversation
if !profile.recent_topics.is_empty() {
let relevant: Vec<&str> = profile.recent_topics.iter()
.filter(|t| {
let t_lower = t.to_lowercase();
let topic_lower = topic.to_lowercase();
t_lower.chars().any(|c| topic_lower.contains(c))
|| topic_lower.chars().any(|c| t_lower.contains(c))
})
.take(3)
.map(|s| s.as_str())
.collect();
if !relevant.is_empty() {
parts.push(format!("近期话题: {}", relevant.join(", ")));
}
}
if parts.is_empty() {
return None;
}
let summary = format!("[用户画像] {}", parts.join(" | "));
if summary.chars().count() > 200 {
Some(truncate(&summary, 200))
} else {
Some(summary)
}
}
// -- internal helpers --
async fn apply_update(&self, update: &ProfileFieldUpdate) -> Result<()> {
match update {
ProfileFieldUpdate::Industry(v) => {
self.store.update_field(DEFAULT_USER, "industry", v).await
}
ProfileFieldUpdate::Role(v) => {
self.store.update_field(DEFAULT_USER, "role", v).await
}
ProfileFieldUpdate::ExpertiseLevel(v) => {
let val = match v {
Level::Beginner => "beginner",
Level::Intermediate => "intermediate",
Level::Expert => "expert",
};
self.store.update_field(DEFAULT_USER, "expertise_level", val).await
}
ProfileFieldUpdate::CommunicationStyle(v) => {
let val = match v {
CommStyle::Concise => "concise",
CommStyle::Detailed => "detailed",
CommStyle::Formal => "formal",
CommStyle::Casual => "casual",
};
self.store.update_field(DEFAULT_USER, "communication_style", val).await
}
ProfileFieldUpdate::PreferredTool(tool) => {
self.store.add_preferred_tool(DEFAULT_USER, tool, 5).await
}
ProfileFieldUpdate::RecentTopic(topic) => {
self.store.add_recent_topic(DEFAULT_USER, topic, 10).await
}
}
}
async fn update_confidence(&self) {
if let Ok(Some(profile)) = self.store.get(DEFAULT_USER).await {
let filled = [
profile.industry.is_some(),
profile.role.is_some(),
profile.expertise_level.is_some(),
profile.communication_style.is_some(),
!profile.recent_topics.is_empty(),
].iter().filter(|&&x| x).count() as f32;
let confidence = (filled / 5.0).min(1.0);
let conf_str = format!("{:.2}", confidence);
if let Err(e) = self.store.update_field(DEFAULT_USER, "confidence", &conf_str).await {
warn!("[UserProfiler] Failed to update confidence: {}", e);
}
}
}
async fn get_or_create_profile(&self) -> UserProfile {
match self.store.get(DEFAULT_USER).await {
Ok(Some(p)) => p,
_ => UserProfile::default_profile(),
}
}
}
fn truncate(s: &str, max_chars: usize) -> String {
if s.chars().count() <= max_chars {
s.to_string()
} else {
s.chars().take(max_chars).collect::<String>() + ""
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_classify_communication_style() {
let fact = Fact::new("喜欢简洁的回答".to_string(), FactCategory::Preference, 0.8);
let update = classify_fact_content(&fact);
assert!(matches!(update, Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Concise))));
let fact2 = Fact::new("请详细说明".to_string(), FactCategory::Preference, 0.8);
let update2 = classify_fact_content(&fact2);
assert!(matches!(update2, Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Detailed))));
}
#[test]
fn test_classify_industry() {
let fact = Fact::new("我在医院工作".to_string(), FactCategory::Knowledge, 0.8);
let update = classify_fact_content(&fact);
assert!(matches!(update, Some(ProfileFieldUpdate::Industry(ref s)) if s == "医疗"));
}
#[test]
fn test_classify_role() {
let fact = Fact::new("我是行政主任".to_string(), FactCategory::Knowledge, 0.8);
let update = classify_fact_content(&fact);
assert!(matches!(update, Some(ProfileFieldUpdate::Role(ref s)) if s == "行政管理"));
}
#[test]
fn test_classify_expertise() {
let fact = Fact::new("我是新手".to_string(), FactCategory::Knowledge, 0.8);
let update = classify_fact_content(&fact);
assert!(matches!(update, Some(ProfileFieldUpdate::ExpertiseLevel(Level::Beginner))));
}
#[test]
fn test_classify_tool() {
let fact = Fact::new("帮我调研一下市场".to_string(), FactCategory::Preference, 0.8);
let update = classify_fact_content(&fact);
assert!(matches!(update, Some(ProfileFieldUpdate::PreferredTool(ref s)) if s == "researcher"));
}
#[test]
fn test_classify_topic_fallback() {
let fact = Fact::new("关于季度报告的编制流程".to_string(), FactCategory::Behavior, 0.7);
let update = classify_fact_content(&fact);
assert!(matches!(update, Some(ProfileFieldUpdate::RecentTopic(_))));
}
#[test]
fn test_classify_low_confidence_ignored() {
let fact = Fact::new("关于季度报告的编制流程".to_string(), FactCategory::Behavior, 0.3);
let update = classify_fact_content(&fact);
assert!(update.is_none());
}
#[test]
fn test_format_profile_summary() {
let profile = UserProfile {
user_id: "default_user".to_string(),
industry: Some("医疗".to_string()),
role: Some("行政主任".to_string()),
expertise_level: Some(Level::Intermediate),
communication_style: Some(CommStyle::Concise),
preferred_language: "zh-CN".to_string(),
recent_topics: vec!["排班管理".to_string()],
active_pain_points: vec![],
preferred_tools: vec![],
confidence: 0.6,
updated_at: Utc::now(),
};
let summary = UserProfiler::format_profile_summary(&profile, "排班");
assert!(summary.is_some());
let text = summary.unwrap();
assert!(text.contains("医疗"));
assert!(text.contains("行政主任"));
assert!(text.contains("排班管理"));
}
#[test]
fn test_format_profile_empty() {
let profile = UserProfile::default_profile();
let summary = UserProfiler::format_profile_summary(&profile, "test");
assert!(summary.is_none());
}
}