//! User Profiler — aggregates extracted facts into a structured user profile. //! //! Takes `ExtractedFactBatch` from the growth pipeline, classifies facts by //! category, and updates the `UserProfile` via `UserProfileStore`. //! //! Desktop uses "default_user" as the single user ID. use std::sync::Arc; use chrono::Utc; use tracing::{debug, warn}; use zclaw_memory::fact::Fact; use zclaw_memory::user_profile_store::{ CommStyle, Level, UserProfile, UserProfileStore, }; use zclaw_types::Result; /// Default user ID for single-user desktop mode. const DEFAULT_USER: &str = "default_user"; // --------------------------------------------------------------------------- // Classification helpers // --------------------------------------------------------------------------- /// Maps a fact category to the profile field it should update. enum ProfileFieldUpdate { Industry(String), Role(String), ExpertiseLevel(Level), CommunicationStyle(CommStyle), PreferredTool(String), RecentTopic(String), } /// Classify a fact content into a profile update. fn classify_fact_content(fact: &Fact) -> Option { let content = fact.content.to_lowercase(); // Communication style detection if content.contains("简洁") || content.contains("简短") || content.contains("简单说") { return Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Concise)); } if content.contains("详细") || content.contains("展开说") || content.contains("多说点") { return Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Detailed)); } if content.contains("正式") || content.contains("专业") || content.contains("官方") { return Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Formal)); } if content.contains("随意") || content.contains("轻松") || content.contains("随便") { return Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Casual)); } // Industry / role detection if content.contains("医疗") || content.contains("医院") || content.contains("诊所") { return Some(ProfileFieldUpdate::Industry("医疗".into())); } if content.contains("制造") || content.contains("工厂") || content.contains("生产") { return Some(ProfileFieldUpdate::Industry("制造业".into())); } if content.contains("教育") || content.contains("学校") || content.contains("教学") { return Some(ProfileFieldUpdate::Industry("教育".into())); } if content.contains("行政") || content.contains("主任") || content.contains("管理") { return Some(ProfileFieldUpdate::Role("行政管理".into())); } if content.contains("工程师") || content.contains("开发") || content.contains("技术") { return Some(ProfileFieldUpdate::Role("技术人员".into())); } if content.contains("医生") || content.contains("护士") || content.contains("临床") { return Some(ProfileFieldUpdate::Role("医务人员".into())); } // Expertise level if content.contains("新手") || content.contains("不会") || content.contains("不了解") { return Some(ProfileFieldUpdate::ExpertiseLevel(Level::Beginner)); } if content.contains("熟练") || content.contains("熟悉") || content.contains("常用") { return Some(ProfileFieldUpdate::ExpertiseLevel(Level::Expert)); } // Tool preferences if content.contains("用研究") || content.contains("帮我查") || content.contains("调研") { return Some(ProfileFieldUpdate::PreferredTool("researcher".into())); } if content.contains("收集") || content.contains("整理") || content.contains("汇总") { return Some(ProfileFieldUpdate::PreferredTool("collector".into())); } if content.contains("幻灯") || content.contains("演示") || content.contains("ppt") { return Some(ProfileFieldUpdate::PreferredTool("slideshow".into())); } // Default: treat as a recent topic if fact.confidence >= 0.6 { let topic = truncate(&fact.content, 30); return Some(ProfileFieldUpdate::RecentTopic(topic)); } None } // --------------------------------------------------------------------------- // UserProfiler // --------------------------------------------------------------------------- /// Aggregates extracted facts into a structured user profile. pub struct UserProfiler { store: Arc, } impl UserProfiler { pub fn new(store: Arc) -> Self { Self { store } } /// Main entry point: update profile from extracted facts. pub async fn update_from_facts( &self, facts: &[Fact], ) -> Result<()> { if facts.is_empty() { return Ok(()); } for fact in facts { if let Some(update) = classify_fact_content(fact) { if let Err(e) = self.apply_update(&update).await { warn!("[UserProfiler] Failed to apply update: {}", e); } } } // Update confidence based on number of classified facts self.update_confidence().await; debug!("[UserProfiler] Updated profile from {} facts", facts.len()); Ok(()) } /// Update active pain points in the profile. pub async fn update_pain_points( &self, pains: Vec, ) -> Result<()> { // Replace all pain points by loading, modifying, and upserting let mut profile = self.get_or_create_profile().await; profile.active_pain_points = pains; profile.updated_at = Utc::now(); self.store.upsert(&profile).await } /// Format relevant profile attributes for injection into system prompt. /// Caps output at ~200 Chinese characters (≈100 tokens). pub fn format_profile_summary(profile: &UserProfile, topic: &str) -> Option { let mut parts = Vec::new(); if let Some(ref industry) = profile.industry { parts.push(format!("行业: {}", industry)); } if let Some(ref role) = profile.role { parts.push(format!("角色: {}", role)); } if let Some(ref level) = profile.expertise_level { let level_str = match level { Level::Beginner => "入门", Level::Intermediate => "中级", Level::Expert => "专家", }; parts.push(format!("水平: {}", level_str)); } if let Some(ref style) = profile.communication_style { let style_str = match style { CommStyle::Concise => "简洁", CommStyle::Detailed => "详细", CommStyle::Formal => "正式", CommStyle::Casual => "随意", }; parts.push(format!("沟通风格: {}", style_str)); } // Only add topics relevant to the current conversation if !profile.recent_topics.is_empty() { let relevant: Vec<&str> = profile.recent_topics.iter() .filter(|t| { let t_lower = t.to_lowercase(); let topic_lower = topic.to_lowercase(); t_lower.chars().any(|c| topic_lower.contains(c)) || topic_lower.chars().any(|c| t_lower.contains(c)) }) .take(3) .map(|s| s.as_str()) .collect(); if !relevant.is_empty() { parts.push(format!("近期话题: {}", relevant.join(", "))); } } if parts.is_empty() { return None; } let summary = format!("[用户画像] {}", parts.join(" | ")); if summary.chars().count() > 200 { Some(truncate(&summary, 200)) } else { Some(summary) } } // -- internal helpers -- async fn apply_update(&self, update: &ProfileFieldUpdate) -> Result<()> { match update { ProfileFieldUpdate::Industry(v) => { self.store.update_field(DEFAULT_USER, "industry", v).await } ProfileFieldUpdate::Role(v) => { self.store.update_field(DEFAULT_USER, "role", v).await } ProfileFieldUpdate::ExpertiseLevel(v) => { let val = match v { Level::Beginner => "beginner", Level::Intermediate => "intermediate", Level::Expert => "expert", }; self.store.update_field(DEFAULT_USER, "expertise_level", val).await } ProfileFieldUpdate::CommunicationStyle(v) => { let val = match v { CommStyle::Concise => "concise", CommStyle::Detailed => "detailed", CommStyle::Formal => "formal", CommStyle::Casual => "casual", }; self.store.update_field(DEFAULT_USER, "communication_style", val).await } ProfileFieldUpdate::PreferredTool(tool) => { self.store.add_preferred_tool(DEFAULT_USER, tool, 5).await } ProfileFieldUpdate::RecentTopic(topic) => { self.store.add_recent_topic(DEFAULT_USER, topic, 10).await } } } async fn update_confidence(&self) { if let Ok(Some(profile)) = self.store.get(DEFAULT_USER).await { let filled = [ profile.industry.is_some(), profile.role.is_some(), profile.expertise_level.is_some(), profile.communication_style.is_some(), !profile.recent_topics.is_empty(), ].iter().filter(|&&x| x).count() as f32; let confidence = (filled / 5.0).min(1.0); let conf_str = format!("{:.2}", confidence); if let Err(e) = self.store.update_field(DEFAULT_USER, "confidence", &conf_str).await { warn!("[UserProfiler] Failed to update confidence: {}", e); } } } async fn get_or_create_profile(&self) -> UserProfile { match self.store.get(DEFAULT_USER).await { Ok(Some(p)) => p, _ => UserProfile::default_profile(), } } } fn truncate(s: &str, max_chars: usize) -> String { if s.chars().count() <= max_chars { s.to_string() } else { s.chars().take(max_chars).collect::() + "…" } } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; #[test] fn test_classify_communication_style() { let fact = Fact::new("喜欢简洁的回答".to_string(), FactCategory::Preference, 0.8); let update = classify_fact_content(&fact); assert!(matches!(update, Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Concise)))); let fact2 = Fact::new("请详细说明".to_string(), FactCategory::Preference, 0.8); let update2 = classify_fact_content(&fact2); assert!(matches!(update2, Some(ProfileFieldUpdate::CommunicationStyle(CommStyle::Detailed)))); } #[test] fn test_classify_industry() { let fact = Fact::new("我在医院工作".to_string(), FactCategory::Knowledge, 0.8); let update = classify_fact_content(&fact); assert!(matches!(update, Some(ProfileFieldUpdate::Industry(ref s)) if s == "医疗")); } #[test] fn test_classify_role() { let fact = Fact::new("我是行政主任".to_string(), FactCategory::Knowledge, 0.8); let update = classify_fact_content(&fact); assert!(matches!(update, Some(ProfileFieldUpdate::Role(ref s)) if s == "行政管理")); } #[test] fn test_classify_expertise() { let fact = Fact::new("我是新手".to_string(), FactCategory::Knowledge, 0.8); let update = classify_fact_content(&fact); assert!(matches!(update, Some(ProfileFieldUpdate::ExpertiseLevel(Level::Beginner)))); } #[test] fn test_classify_tool() { let fact = Fact::new("帮我调研一下市场".to_string(), FactCategory::Preference, 0.8); let update = classify_fact_content(&fact); assert!(matches!(update, Some(ProfileFieldUpdate::PreferredTool(ref s)) if s == "researcher")); } #[test] fn test_classify_topic_fallback() { let fact = Fact::new("关于季度报告的编制流程".to_string(), FactCategory::Behavior, 0.7); let update = classify_fact_content(&fact); assert!(matches!(update, Some(ProfileFieldUpdate::RecentTopic(_)))); } #[test] fn test_classify_low_confidence_ignored() { let fact = Fact::new("关于季度报告的编制流程".to_string(), FactCategory::Behavior, 0.3); let update = classify_fact_content(&fact); assert!(update.is_none()); } #[test] fn test_format_profile_summary() { let profile = UserProfile { user_id: "default_user".to_string(), industry: Some("医疗".to_string()), role: Some("行政主任".to_string()), expertise_level: Some(Level::Intermediate), communication_style: Some(CommStyle::Concise), preferred_language: "zh-CN".to_string(), recent_topics: vec!["排班管理".to_string()], active_pain_points: vec![], preferred_tools: vec![], confidence: 0.6, updated_at: Utc::now(), }; let summary = UserProfiler::format_profile_summary(&profile, "排班"); assert!(summary.is_some()); let text = summary.unwrap(); assert!(text.contains("医疗")); assert!(text.contains("行政主任")); assert!(text.contains("排班管理")); } #[test] fn test_format_profile_empty() { let profile = UserProfile::default_profile(); let summary = UserProfiler::format_profile_summary(&profile, "test"); assert!(summary.is_none()); } }