/** * Memory Index - High-performance indexing for agent memory retrieval * * Implements inverted index + LRU cache for sub-20ms retrieval on 1000+ memories. * * Performance targets: * - Retrieval latency: <20ms (vs ~50ms with linear scan) * - 1000 memories: smooth operation * - Memory overhead: ~30% additional for indexes * * Reference: Task "Optimize ZCLAW Agent Memory Retrieval Performance" */ import type { MemoryEntry, MemoryType } from './agent-memory'; // === Types === export interface IndexStats { totalEntries: number; keywordCount: number; cacheHitRate: number; cacheSize: number; avgQueryTime: number; } interface CacheEntry { results: string[]; // memory IDs timestamp: number; } // === Tokenization (shared with agent-memory.ts) === export function tokenize(text: string): string[] { return text .toLowerCase() .replace(/[^\w\u4e00-\u9fff\u3400-\u4dbf]+/g, ' ') .split(/\s+/) .filter(t => t.length > 0); } // === LRU Cache Implementation === class LRUCache { private cache: Map; private maxSize: number; constructor(maxSize: number) { this.cache = new Map(); this.maxSize = maxSize; } get(key: K): V | undefined { const value = this.cache.get(key); if (value !== undefined) { // Move to end (most recently used) this.cache.delete(key); this.cache.set(key, value); } return value; } set(key: K, value: V): void { if (this.cache.has(key)) { this.cache.delete(key); } else if (this.cache.size >= this.maxSize) { // Remove least recently used (first item) const firstKey = this.cache.keys().next().value; if (firstKey !== undefined) { this.cache.delete(firstKey); } } this.cache.set(key, value); } clear(): void { this.cache.clear(); } get size(): number { return this.cache.size; } } // === Memory Index Implementation === export class MemoryIndex { // Inverted indexes private keywordIndex: Map> = new Map(); // keyword -> memoryIds private typeIndex: Map> = new Map(); // type -> memoryIds private agentIndex: Map> = new Map(); // agentId -> memoryIds private tagIndex: Map> = new Map(); // tag -> memoryIds // Pre-tokenized content cache private tokenCache: Map = new Map(); // memoryId -> tokens // Query result cache private queryCache: LRUCache; // Statistics private cacheHits = 0; private cacheMisses = 0; private queryTimes: number[] = []; constructor(cacheSize = 100) { this.queryCache = new LRUCache(cacheSize); } // === Index Building === /** * Build or update index for a memory entry. * Call this when adding or updating a memory. */ index(entry: MemoryEntry): void { const { id, agentId, type, tags, content } = entry; // Index by agent if (!this.agentIndex.has(agentId)) { this.agentIndex.set(agentId, new Set()); } this.agentIndex.get(agentId)!.add(id); // Index by type if (!this.typeIndex.has(type)) { this.typeIndex.set(type, new Set()); } this.typeIndex.get(type)!.add(id); // Index by tags for (const tag of tags) { const normalizedTag = tag.toLowerCase(); if (!this.tagIndex.has(normalizedTag)) { this.tagIndex.set(normalizedTag, new Set()); } this.tagIndex.get(normalizedTag)!.add(id); } // Index by content keywords const tokens = tokenize(content); this.tokenCache.set(id, tokens); for (const token of tokens) { if (!this.keywordIndex.has(token)) { this.keywordIndex.set(token, new Set()); } this.keywordIndex.get(token)!.add(id); } // Invalidate query cache on index change this.queryCache.clear(); } /** * Remove a memory from all indexes. */ remove(memoryId: string): void { // Remove from agent index for (const [agentId, ids] of this.agentIndex) { ids.delete(memoryId); if (ids.size === 0) { this.agentIndex.delete(agentId); } } // Remove from type index for (const [type, ids] of this.typeIndex) { ids.delete(memoryId); if (ids.size === 0) { this.typeIndex.delete(type); } } // Remove from tag index for (const [tag, ids] of this.tagIndex) { ids.delete(memoryId); if (ids.size === 0) { this.tagIndex.delete(tag); } } // Remove from keyword index for (const [keyword, ids] of this.keywordIndex) { ids.delete(memoryId); if (ids.size === 0) { this.keywordIndex.delete(keyword); } } // Remove token cache this.tokenCache.delete(memoryId); // Invalidate query cache this.queryCache.clear(); } /** * Rebuild all indexes from scratch. * Use after bulk updates or data corruption. */ rebuild(entries: MemoryEntry[]): void { this.clear(); for (const entry of entries) { this.index(entry); } } /** * Clear all indexes. */ clear(): void { this.keywordIndex.clear(); this.typeIndex.clear(); this.agentIndex.clear(); this.tagIndex.clear(); this.tokenCache.clear(); this.queryCache.clear(); this.cacheHits = 0; this.cacheMisses = 0; this.queryTimes = []; } // === Fast Filtering === /** * Get candidate memory IDs based on filter options. * Uses indexes for O(1) lookups instead of O(n) scans. */ getCandidates(options: { agentId?: string; type?: MemoryType; types?: MemoryType[]; tags?: string[]; }): Set | null { const candidateSets: Set[] = []; // Filter by agent if (options.agentId) { const agentSet = this.agentIndex.get(options.agentId); if (!agentSet) return new Set(); // Agent has no memories candidateSets.push(agentSet); } // Filter by single type if (options.type) { const typeSet = this.typeIndex.get(options.type); if (!typeSet) return new Set(); // No memories of this type candidateSets.push(typeSet); } // Filter by multiple types if (options.types && options.types.length > 0) { const typeUnion = new Set(); for (const t of options.types) { const typeSet = this.typeIndex.get(t); if (typeSet) { for (const id of typeSet) { typeUnion.add(id); } } } if (typeUnion.size === 0) return new Set(); candidateSets.push(typeUnion); } // Filter by tags (OR logic - match any tag) if (options.tags && options.tags.length > 0) { const tagUnion = new Set(); for (const tag of options.tags) { const normalizedTag = tag.toLowerCase(); const tagSet = this.tagIndex.get(normalizedTag); if (tagSet) { for (const id of tagSet) { tagUnion.add(id); } } } if (tagUnion.size === 0) return new Set(); candidateSets.push(tagUnion); } // Intersect all candidate sets if (candidateSets.length === 0) { return null; // No filters applied, return null to indicate "all" } // Start with smallest set for efficiency candidateSets.sort((a, b) => a.size - b.size); let result = new Set(candidateSets[0]); for (let i = 1; i < candidateSets.length; i++) { const nextSet = candidateSets[i]; result = new Set([...result].filter(id => nextSet.has(id))); if (result.size === 0) break; } return result; } // === Keyword Search === /** * Get memory IDs that contain any of the query keywords. * Returns a map of memoryId -> match count for ranking. */ searchKeywords(queryTokens: string[]): Map { const matchCounts = new Map(); for (const token of queryTokens) { const matchingIds = this.keywordIndex.get(token); if (matchingIds) { for (const id of matchingIds) { matchCounts.set(id, (matchCounts.get(id) ?? 0) + 1); } } // Also check for partial matches (token is substring of indexed keyword) for (const [keyword, ids] of this.keywordIndex) { if (keyword.includes(token) || token.includes(keyword)) { for (const id of ids) { matchCounts.set(id, (matchCounts.get(id) ?? 0) + 1); } } } } return matchCounts; } /** * Get pre-tokenized content for a memory. */ getTokens(memoryId: string): string[] | undefined { return this.tokenCache.get(memoryId); } // === Query Cache === /** * Generate cache key from query and options. */ private getCacheKey(query: string, options?: Record): string { const opts = options ?? {}; return `${query}|${opts.agentId ?? ''}|${opts.type ?? ''}|${(opts.types as string[])?.join(',') ?? ''}|${(opts.tags as string[])?.join(',') ?? ''}|${opts.minImportance ?? ''}|${opts.limit ?? ''}`; } /** * Get cached query results. */ getCached(query: string, options?: Record): string[] | null { const key = this.getCacheKey(query, options); const cached = this.queryCache.get(key); if (cached) { this.cacheHits++; return cached.results; } this.cacheMisses++; return null; } /** * Cache query results. */ setCached(query: string, options: Record | undefined, results: string[]): void { const key = this.getCacheKey(query, options); this.queryCache.set(key, { results, timestamp: Date.now(), }); } // === Statistics === /** * Record query time for statistics. */ recordQueryTime(timeMs: number): void { this.queryTimes.push(timeMs); // Keep last 100 query times if (this.queryTimes.length > 100) { this.queryTimes.shift(); } } /** * Get index statistics. */ getStats(): IndexStats { const avgQueryTime = this.queryTimes.length > 0 ? this.queryTimes.reduce((a, b) => a + b, 0) / this.queryTimes.length : 0; const totalRequests = this.cacheHits + this.cacheMisses; return { totalEntries: this.tokenCache.size, keywordCount: this.keywordIndex.size, cacheHitRate: totalRequests > 0 ? this.cacheHits / totalRequests : 0, cacheSize: this.queryCache.size, avgQueryTime, }; } /** * Get index memory usage estimate. */ getMemoryUsage(): { estimated: number; breakdown: Record } { let keywordIndexSize = 0; for (const [keyword, ids] of this.keywordIndex) { keywordIndexSize += keyword.length * 2 + ids.size * 50; // rough estimate } return { estimated: keywordIndexSize + this.typeIndex.size * 100 + this.agentIndex.size * 100 + this.tagIndex.size * 100 + this.tokenCache.size * 200, breakdown: { keywordIndex: keywordIndexSize, typeIndex: this.typeIndex.size * 100, agentIndex: this.agentIndex.size * 100, tagIndex: this.tagIndex.size * 100, tokenCache: this.tokenCache.size * 200, }, }; } } // === Singleton === let _instance: MemoryIndex | null = null; export function getMemoryIndex(): MemoryIndex { if (!_instance) { _instance = new MemoryIndex(); } return _instance; } export function resetMemoryIndex(): void { _instance = null; }