Files
zclaw_openfang/desktop/src/lib/memory-index.ts
iven adfd7024df docs(claude): restructure documentation management and add feedback system
- Restructure §8 from "文档沉淀规则" to "文档管理规则" with 4 subsections
  - Add docs/ structure with features/ and knowledge-base/ directories
  - Add feature documentation template with 7 sections (概述/设计初衷/技术设计/预期作用/实际效果/演化路线/头脑风暴)
  - Add feature update trigger matrix (新增/修改/完成/问题/反馈)
  - Add documentation quality checklist
- Add §16
2026-03-16 13:54:03 +08:00

444 lines
11 KiB
TypeScript

/**
* Memory Index - High-performance indexing for agent memory retrieval
*
* Implements inverted index + LRU cache for sub-20ms retrieval on 1000+ memories.
*
* Performance targets:
* - Retrieval latency: <20ms (vs ~50ms with linear scan)
* - 1000 memories: smooth operation
* - Memory overhead: ~30% additional for indexes
*
* Reference: Task "Optimize ZCLAW Agent Memory Retrieval Performance"
*/
import type { MemoryEntry, MemoryType } from './agent-memory';
// === Types ===
export interface IndexStats {
totalEntries: number;
keywordCount: number;
cacheHitRate: number;
cacheSize: number;
avgQueryTime: number;
}
interface CacheEntry {
results: string[]; // memory IDs
timestamp: number;
}
// === Tokenization (shared with agent-memory.ts) ===
export function tokenize(text: string): string[] {
return text
.toLowerCase()
.replace(/[^\w\u4e00-\u9fff\u3400-\u4dbf]+/g, ' ')
.split(/\s+/)
.filter(t => t.length > 0);
}
// === LRU Cache Implementation ===
class LRUCache<K, V> {
private cache: Map<K, V>;
private maxSize: number;
constructor(maxSize: number) {
this.cache = new Map();
this.maxSize = maxSize;
}
get(key: K): V | undefined {
const value = this.cache.get(key);
if (value !== undefined) {
// Move to end (most recently used)
this.cache.delete(key);
this.cache.set(key, value);
}
return value;
}
set(key: K, value: V): void {
if (this.cache.has(key)) {
this.cache.delete(key);
} else if (this.cache.size >= this.maxSize) {
// Remove least recently used (first item)
const firstKey = this.cache.keys().next().value;
if (firstKey !== undefined) {
this.cache.delete(firstKey);
}
}
this.cache.set(key, value);
}
clear(): void {
this.cache.clear();
}
get size(): number {
return this.cache.size;
}
}
// === Memory Index Implementation ===
export class MemoryIndex {
// Inverted indexes
private keywordIndex: Map<string, Set<string>> = new Map(); // keyword -> memoryIds
private typeIndex: Map<MemoryType, Set<string>> = new Map(); // type -> memoryIds
private agentIndex: Map<string, Set<string>> = new Map(); // agentId -> memoryIds
private tagIndex: Map<string, Set<string>> = new Map(); // tag -> memoryIds
// Pre-tokenized content cache
private tokenCache: Map<string, string[]> = new Map(); // memoryId -> tokens
// Query result cache
private queryCache: LRUCache<string, CacheEntry>;
// Statistics
private cacheHits = 0;
private cacheMisses = 0;
private queryTimes: number[] = [];
constructor(cacheSize = 100) {
this.queryCache = new LRUCache(cacheSize);
}
// === Index Building ===
/**
* Build or update index for a memory entry.
* Call this when adding or updating a memory.
*/
index(entry: MemoryEntry): void {
const { id, agentId, type, tags, content } = entry;
// Index by agent
if (!this.agentIndex.has(agentId)) {
this.agentIndex.set(agentId, new Set());
}
this.agentIndex.get(agentId)!.add(id);
// Index by type
if (!this.typeIndex.has(type)) {
this.typeIndex.set(type, new Set());
}
this.typeIndex.get(type)!.add(id);
// Index by tags
for (const tag of tags) {
const normalizedTag = tag.toLowerCase();
if (!this.tagIndex.has(normalizedTag)) {
this.tagIndex.set(normalizedTag, new Set());
}
this.tagIndex.get(normalizedTag)!.add(id);
}
// Index by content keywords
const tokens = tokenize(content);
this.tokenCache.set(id, tokens);
for (const token of tokens) {
if (!this.keywordIndex.has(token)) {
this.keywordIndex.set(token, new Set());
}
this.keywordIndex.get(token)!.add(id);
}
// Invalidate query cache on index change
this.queryCache.clear();
}
/**
* Remove a memory from all indexes.
*/
remove(memoryId: string): void {
// Remove from agent index
for (const [agentId, ids] of this.agentIndex) {
ids.delete(memoryId);
if (ids.size === 0) {
this.agentIndex.delete(agentId);
}
}
// Remove from type index
for (const [type, ids] of this.typeIndex) {
ids.delete(memoryId);
if (ids.size === 0) {
this.typeIndex.delete(type);
}
}
// Remove from tag index
for (const [tag, ids] of this.tagIndex) {
ids.delete(memoryId);
if (ids.size === 0) {
this.tagIndex.delete(tag);
}
}
// Remove from keyword index
for (const [keyword, ids] of this.keywordIndex) {
ids.delete(memoryId);
if (ids.size === 0) {
this.keywordIndex.delete(keyword);
}
}
// Remove token cache
this.tokenCache.delete(memoryId);
// Invalidate query cache
this.queryCache.clear();
}
/**
* Rebuild all indexes from scratch.
* Use after bulk updates or data corruption.
*/
rebuild(entries: MemoryEntry[]): void {
this.clear();
for (const entry of entries) {
this.index(entry);
}
}
/**
* Clear all indexes.
*/
clear(): void {
this.keywordIndex.clear();
this.typeIndex.clear();
this.agentIndex.clear();
this.tagIndex.clear();
this.tokenCache.clear();
this.queryCache.clear();
this.cacheHits = 0;
this.cacheMisses = 0;
this.queryTimes = [];
}
// === Fast Filtering ===
/**
* Get candidate memory IDs based on filter options.
* Uses indexes for O(1) lookups instead of O(n) scans.
*/
getCandidates(options: {
agentId?: string;
type?: MemoryType;
types?: MemoryType[];
tags?: string[];
}): Set<string> | null {
const candidateSets: Set<string>[] = [];
// Filter by agent
if (options.agentId) {
const agentSet = this.agentIndex.get(options.agentId);
if (!agentSet) return new Set(); // Agent has no memories
candidateSets.push(agentSet);
}
// Filter by single type
if (options.type) {
const typeSet = this.typeIndex.get(options.type);
if (!typeSet) return new Set(); // No memories of this type
candidateSets.push(typeSet);
}
// Filter by multiple types
if (options.types && options.types.length > 0) {
const typeUnion = new Set<string>();
for (const t of options.types) {
const typeSet = this.typeIndex.get(t);
if (typeSet) {
for (const id of typeSet) {
typeUnion.add(id);
}
}
}
if (typeUnion.size === 0) return new Set();
candidateSets.push(typeUnion);
}
// Filter by tags (OR logic - match any tag)
if (options.tags && options.tags.length > 0) {
const tagUnion = new Set<string>();
for (const tag of options.tags) {
const normalizedTag = tag.toLowerCase();
const tagSet = this.tagIndex.get(normalizedTag);
if (tagSet) {
for (const id of tagSet) {
tagUnion.add(id);
}
}
}
if (tagUnion.size === 0) return new Set();
candidateSets.push(tagUnion);
}
// Intersect all candidate sets
if (candidateSets.length === 0) {
return null; // No filters applied, return null to indicate "all"
}
// Start with smallest set for efficiency
candidateSets.sort((a, b) => a.size - b.size);
let result = new Set(candidateSets[0]);
for (let i = 1; i < candidateSets.length; i++) {
const nextSet = candidateSets[i];
result = new Set([...result].filter(id => nextSet.has(id)));
if (result.size === 0) break;
}
return result;
}
// === Keyword Search ===
/**
* Get memory IDs that contain any of the query keywords.
* Returns a map of memoryId -> match count for ranking.
*/
searchKeywords(queryTokens: string[]): Map<string, number> {
const matchCounts = new Map<string, number>();
for (const token of queryTokens) {
const matchingIds = this.keywordIndex.get(token);
if (matchingIds) {
for (const id of matchingIds) {
matchCounts.set(id, (matchCounts.get(id) ?? 0) + 1);
}
}
// Also check for partial matches (token is substring of indexed keyword)
for (const [keyword, ids] of this.keywordIndex) {
if (keyword.includes(token) || token.includes(keyword)) {
for (const id of ids) {
matchCounts.set(id, (matchCounts.get(id) ?? 0) + 1);
}
}
}
}
return matchCounts;
}
/**
* Get pre-tokenized content for a memory.
*/
getTokens(memoryId: string): string[] | undefined {
return this.tokenCache.get(memoryId);
}
// === Query Cache ===
/**
* Generate cache key from query and options.
*/
private getCacheKey(query: string, options?: Record<string, unknown>): string {
const opts = options ?? {};
return `${query}|${opts.agentId ?? ''}|${opts.type ?? ''}|${(opts.types as string[])?.join(',') ?? ''}|${(opts.tags as string[])?.join(',') ?? ''}|${opts.minImportance ?? ''}|${opts.limit ?? ''}`;
}
/**
* Get cached query results.
*/
getCached(query: string, options?: Record<string, unknown>): string[] | null {
const key = this.getCacheKey(query, options);
const cached = this.queryCache.get(key);
if (cached) {
this.cacheHits++;
return cached.results;
}
this.cacheMisses++;
return null;
}
/**
* Cache query results.
*/
setCached(query: string, options: Record<string, unknown> | undefined, results: string[]): void {
const key = this.getCacheKey(query, options);
this.queryCache.set(key, {
results,
timestamp: Date.now(),
});
}
// === Statistics ===
/**
* Record query time for statistics.
*/
recordQueryTime(timeMs: number): void {
this.queryTimes.push(timeMs);
// Keep last 100 query times
if (this.queryTimes.length > 100) {
this.queryTimes.shift();
}
}
/**
* Get index statistics.
*/
getStats(): IndexStats {
const avgQueryTime = this.queryTimes.length > 0
? this.queryTimes.reduce((a, b) => a + b, 0) / this.queryTimes.length
: 0;
const totalRequests = this.cacheHits + this.cacheMisses;
return {
totalEntries: this.tokenCache.size,
keywordCount: this.keywordIndex.size,
cacheHitRate: totalRequests > 0 ? this.cacheHits / totalRequests : 0,
cacheSize: this.queryCache.size,
avgQueryTime,
};
}
/**
* Get index memory usage estimate.
*/
getMemoryUsage(): { estimated: number; breakdown: Record<string, number> } {
let keywordIndexSize = 0;
for (const [keyword, ids] of this.keywordIndex) {
keywordIndexSize += keyword.length * 2 + ids.size * 50; // rough estimate
}
return {
estimated:
keywordIndexSize +
this.typeIndex.size * 100 +
this.agentIndex.size * 100 +
this.tagIndex.size * 100 +
this.tokenCache.size * 200,
breakdown: {
keywordIndex: keywordIndexSize,
typeIndex: this.typeIndex.size * 100,
agentIndex: this.agentIndex.size * 100,
tagIndex: this.tagIndex.size * 100,
tokenCache: this.tokenCache.size * 200,
},
};
}
}
// === Singleton ===
let _instance: MemoryIndex | null = null;
export function getMemoryIndex(): MemoryIndex {
if (!_instance) {
_instance = new MemoryIndex();
}
return _instance;
}
export function resetMemoryIndex(): void {
_instance = null;
}