/** * Memory Extractor - Automatically extract memorable information from conversations * * Uses LLM to analyze completed conversations and extract: * - Facts the user shared * - User preferences discovered * - Lessons learned during problem-solving * - Pending tasks or commitments * * Also handles auto-updating USER.md with discovered preferences. * * Phase 1: Rule-based extraction (pattern matching). * Phase 4: LLM-powered semantic extraction with importance scoring. * * Reference: ZCLAW_AGENT_INTELLIGENCE_EVOLUTION.md §6.2.2 */ import { intelligenceClient, type MemoryType, } from './intelligence-client'; import { getLLMAdapter, llmExtract, type LLMServiceAdapter, type LLMProvider, } from './llm-service'; import { extractAndStoreMemories, type ChatMessageForExtraction, } from './viking-client'; import { createLogger } from './logger'; const log = createLogger('MemoryExtractor'); // === Types === export interface ExtractedItem { content: string; type: MemoryType; importance: number; tags: string[]; } export interface ExtractionResult { items: ExtractedItem[]; saved: number; skipped: number; userProfileUpdated: boolean; } export interface ConversationMessage { role: string; content: string; } export interface ExtractionConfig { useLLM: boolean; // Use LLM for semantic extraction (Phase 4) llmProvider?: LLMProvider; // Preferred LLM provider llmFallbackToRules: boolean; // Fall back to rules if LLM fails minMessagesForExtraction: number; // Minimum messages before extraction extractionCooldownMs: number; // Cooldown between extractions minImportanceThreshold: number; // Only save items with importance >= this } // === Extraction Prompt === const EXTRACTION_PROMPT = `请从以下对话中提取值得长期记住的信息。只提取以下类型： - fact: 用户告知的事实（如"我的公司叫 XXX"、"我在做 YYY 项目"） - preference: 用户的偏好（如"我喜欢简洁的回答"、"请用中文"） - lesson: 本次对话的经验教训（如"调用 API 前需要先验证 token"） - task: 未完成的任务或承诺（如"下次帮我检查 XXX"）评估规则： - importance 1-3: 临时性、不太重要的信息 - importance 4-6: 有一定参考价值的信息 - importance 7-9: 重要的持久信息 - importance 10: 极其关键的信息输出**纯 JSON 数组**，每项包含 content, type, importance, tags[]。如果没有值得记忆的内容，返回空数组 []。不要输出任何其他内容，只输出 JSON。对话内容： `; // === Default Config === export const DEFAULT_EXTRACTION_CONFIG: ExtractionConfig = { useLLM: true, // Enable LLM-powered semantic extraction by default llmFallbackToRules: true, minMessagesForExtraction: 2, // Lowered from 4 to capture memories earlier extractionCooldownMs: 30_000, minImportanceThreshold: 3, }; // === Memory Extractor === export class MemoryExtractor { private config: ExtractionConfig; private lastExtractionTime = 0; private llmAdapter: LLMServiceAdapter | null = null; constructor(config?: Partial) { this.config = { ...DEFAULT_EXTRACTION_CONFIG, ...config }; // Initialize LLM adapter if configured if (this.config.useLLM) { try { this.llmAdapter = getLLMAdapter(); } catch (error) { log.warn('Failed to initialize LLM adapter:', error); } } } /** * Extract memories from a conversation. * Uses LLM if configured, falls back to rule-based extraction. */ async extractFromConversation( messages: ConversationMessage[], agentId: string, conversationId?: string, options?: { forceLLM?: boolean } ): Promise { // Cooldown check if (Date.now() - this.lastExtractionTime < this.config.extractionCooldownMs) { log.debug('Skipping extraction: cooldown active'); return { items: [], saved: 0, skipped: 0, userProfileUpdated: false }; } // Minimum message threshold const chatMessages = messages.filter(m => m.role === 'user' || m.role === 'assistant'); log.debug(`Checking extraction: ${chatMessages.length} messages (min: ${this.config.minMessagesForExtraction})`); if (chatMessages.length < this.config.minMessagesForExtraction) { log.debug('Skipping extraction: not enough messages'); return { items: [], saved: 0, skipped: 0, userProfileUpdated: false }; } this.lastExtractionTime = Date.now(); // Try LLM extraction if enabled let extracted: ExtractedItem[]; if ((this.config.useLLM || options?.forceLLM) && this.llmAdapter?.isAvailable()) { try { log.debug('Using LLM-powered semantic extraction'); extracted = await this.llmBasedExtraction(chatMessages); } catch (error) { log.error('LLM extraction failed:', error); if (!this.config.llmFallbackToRules) { throw error; } log.debug('Falling back to rule-based extraction'); extracted = this.ruleBasedExtraction(chatMessages); } } else { // Rule-based extraction log.debug('Using rule-based extraction'); extracted = this.ruleBasedExtraction(chatMessages); log.debug(`Rule-based extracted ${extracted.length} items before filtering`); } // Filter by importance threshold extracted = extracted.filter(item => item.importance >= this.config.minImportanceThreshold); log.debug(`After importance filtering (>= ${this.config.minImportanceThreshold}): ${extracted.length} items`); // Save to memory (dual storage: intelligenceClient + viking-client/SqliteStorage) let saved = 0; let skipped = 0; // Primary: Store via viking-client to SqliteStorage (persistent) if (extracted.length > 0) { try { const chatMessagesForViking: ChatMessageForExtraction[] = chatMessages.map(m => ({ role: m.role, content: m.content, })); const vikingResult = await extractAndStoreMemories( chatMessagesForViking, agentId ); log.debug(`Viking storage result: ${vikingResult.summary}`); saved = vikingResult.memories.length; } catch (err) { log.warn('Viking storage failed, falling back to intelligenceClient:', err); // Fallback: Store via intelligenceClient (in-memory/graph) for (const item of extracted) { try { await intelligenceClient.memory.store({ agent_id: agentId, memory_type: item.type, content: item.content, importance: item.importance, source: 'auto', tags: item.tags, conversation_id: conversationId, }); saved++; } catch (e) { log.debug('Failed to save memory item', { error: e }); skipped++; } } } } // Auto-update USER.md with preferences let userProfileUpdated = false; const preferences = extracted.filter(e => e.type === 'preference' && e.importance >= 5); if (preferences.length > 0) { try { const prefSummary = preferences.map(p => `- ${p.content}`).join('\n'); await intelligenceClient.identity.appendUserProfile(agentId, `### 自动发现的偏好 (${new Date().toLocaleDateString('zh-CN')})\n${prefSummary}`); userProfileUpdated = true; } catch (err) { log.warn('Failed to update USER.md:', err); } } if (saved > 0) { log.debug(`Extracted ${saved} memories from conversation (${skipped} skipped)`); } return { items: extracted, saved, skipped, userProfileUpdated }; } /** * LLM-powered semantic extraction. * Uses LLM to understand context and score importance semantically. */ private async llmBasedExtraction(messages: ConversationMessage[]): Promise { const conversationText = messages .filter(m => m.role === 'user' || m.role === 'assistant') .map(m => `[${m.role === 'user' ? '用户' : '助手'}]: ${m.content}`) .join('\n\n'); // Use llmExtract helper from llm-service const llmResponse = await llmExtract(conversationText, this.llmAdapter!); // Parse the JSON response return this.parseExtractionResponse(llmResponse); } /** * Phase 1: Rule-based extraction using pattern matching. * Extracts common patterns from user messages. */ private ruleBasedExtraction(messages: ConversationMessage[]): ExtractedItem[] { const items: ExtractedItem[] = []; const userMessages = messages.filter(m => m.role === 'user').map(m => m.content); for (const msg of userMessages) { // Fact patterns this.extractFacts(msg, items); // Preference patterns this.extractPreferences(msg, items); // Task patterns this.extractTasks(msg, items); } // Lesson extraction from assistant messages (error corrections, solutions) const assistantMessages = messages.filter(m => m.role === 'assistant').map(m => m.content); this.extractLessons(userMessages, assistantMessages, items); return items; } private extractFacts(msg: string, items: ExtractedItem[]): void { // "我的/我们的 X 是/叫 Y" patterns const factPatterns = [ /我(?:的|们的|们)(\S{1,20})(?:是|叫|名叫|名字是)(.{2,50})/g, /(?:公司|团队|项目|产品)(?:名|名称)?(?:是|叫)(.{2,30})/g, /我(?:在|正在)(?:做|开发|使用|学习)(.{2,40})/g, /我(?:是|做)(.{2,30})(?:的|工作)/g, ]; for (const pattern of factPatterns) { const matches = msg.matchAll(pattern); for (const match of matches) { const content = match[0].trim(); if (content.length > 5 && content.length < 100) { items.push({ content, type: 'fact', importance: 6, tags: ['auto-extracted'], }); } } } } private extractPreferences(msg: string, items: ExtractedItem[]): void { const prefPatterns = [ /(?:我喜欢|我偏好|我习惯|请用|请使用|默认用|我更愿意)(.{2,50})/g, /(?:不要|别|不用)(.{2,30})(?:了|吧)?/g, /(?:以后|下次|每次)(?:都)?(.{2,40})/g, /(?:用中文|用英文|简洁|详细|简短)(?:一点|回复|回答)?/g, ]; for (const pattern of prefPatterns) { const matches = msg.matchAll(pattern); for (const match of matches) { const content = match[0].trim(); if (content.length > 3 && content.length < 80) { items.push({ content: `用户偏好: ${content}`, type: 'preference', importance: 5, tags: ['auto-extracted', 'preference'], }); } } } } private extractTasks(msg: string, items: ExtractedItem[]): void { const taskPatterns = [ /(?:帮我|帮忙|记得|别忘了|下次|以后|待办)(.{5,60})/g, /(?:TODO|todo|FIXME|fixme)[:\s]*(.{5,60})/g, ]; for (const pattern of taskPatterns) { const matches = msg.matchAll(pattern); for (const match of matches) { const content = match[0].trim(); if (content.length > 5 && content.length < 100) { items.push({ content, type: 'task', importance: 7, tags: ['auto-extracted', 'task'], }); } } } } private extractLessons( _userMessages: string[], assistantMessages: string[], items: ExtractedItem[] ): void { // Look for error resolution patterns in assistant messages for (const msg of assistantMessages) { // "问题是/原因是/根因是" patterns const lessonPatterns = [ /(?:问题是|原因是|根因是|解决方法是|关键是)(.{10,100})/g, /(?:需要注意|要注意|注意事项)[：:](.{10,80})/g, ]; for (const pattern of lessonPatterns) { const matches = msg.matchAll(pattern); for (const match of matches) { const content = match[0].trim(); if (content.length > 10 && content.length < 150) { items.push({ content, type: 'lesson', importance: 6, tags: ['auto-extracted', 'lesson'], }); } } } } } /** * Build the LLM extraction prompt for a conversation. * For Phase 2: send this to LLM and parse the JSON response. */ buildExtractionPrompt(messages: ConversationMessage[]): string { const conversationText = messages .filter(m => m.role === 'user' || m.role === 'assistant') .map(m => `[${m.role === 'user' ? '用户' : '助手'}]: ${m.content}`) .join('\n\n'); return EXTRACTION_PROMPT + conversationText; } /** * Parse LLM extraction response. * For Phase 2: parse the JSON array from LLM response. */ parseExtractionResponse(response: string): ExtractedItem[] { try { // Find JSON array in response const jsonMatch = response.match(/\[[\s\S]*\]/); if (!jsonMatch) return []; const parsed = JSON.parse(jsonMatch[0]); if (!Array.isArray(parsed)) return []; return parsed .filter((item: Record) => item.content && item.type && item.importance !== undefined ) .map((item: Record) => ({ content: String(item.content), type: item.type as MemoryType, importance: Math.max(1, Math.min(10, Number(item.importance))), tags: Array.isArray(item.tags) ? item.tags.map(String) : [], })); } catch (e) { log.warn('Failed to parse LLM extraction response', { error: e }); return []; } } } // === Singleton === let _instance: MemoryExtractor | null = null; export function getMemoryExtractor(): MemoryExtractor { if (!_instance) { _instance = new MemoryExtractor(); } return _instance; } export function resetMemoryExtractor(): void { _instance = null; }