- Restructure §8 from "文档沉淀规则" to "文档管理规则" with 4 subsections - Add docs/ structure with features/ and knowledge-base/ directories - Add feature documentation template with 7 sections (概述/设计初衷/技术设计/预期作用/实际效果/演化路线/头脑风暴) - Add feature update trigger matrix (新增/修改/完成/问题/反馈) - Add documentation quality checklist - Add §16
444 lines
11 KiB
TypeScript
444 lines
11 KiB
TypeScript
/**
|
|
* Memory Index - High-performance indexing for agent memory retrieval
|
|
*
|
|
* Implements inverted index + LRU cache for sub-20ms retrieval on 1000+ memories.
|
|
*
|
|
* Performance targets:
|
|
* - Retrieval latency: <20ms (vs ~50ms with linear scan)
|
|
* - 1000 memories: smooth operation
|
|
* - Memory overhead: ~30% additional for indexes
|
|
*
|
|
* Reference: Task "Optimize ZCLAW Agent Memory Retrieval Performance"
|
|
*/
|
|
|
|
import type { MemoryEntry, MemoryType } from './agent-memory';
|
|
|
|
// === Types ===
|
|
|
|
export interface IndexStats {
|
|
totalEntries: number;
|
|
keywordCount: number;
|
|
cacheHitRate: number;
|
|
cacheSize: number;
|
|
avgQueryTime: number;
|
|
}
|
|
|
|
interface CacheEntry {
|
|
results: string[]; // memory IDs
|
|
timestamp: number;
|
|
}
|
|
|
|
// === Tokenization (shared with agent-memory.ts) ===
|
|
|
|
export function tokenize(text: string): string[] {
|
|
return text
|
|
.toLowerCase()
|
|
.replace(/[^\w\u4e00-\u9fff\u3400-\u4dbf]+/g, ' ')
|
|
.split(/\s+/)
|
|
.filter(t => t.length > 0);
|
|
}
|
|
|
|
// === LRU Cache Implementation ===
|
|
|
|
class LRUCache<K, V> {
|
|
private cache: Map<K, V>;
|
|
private maxSize: number;
|
|
|
|
constructor(maxSize: number) {
|
|
this.cache = new Map();
|
|
this.maxSize = maxSize;
|
|
}
|
|
|
|
get(key: K): V | undefined {
|
|
const value = this.cache.get(key);
|
|
if (value !== undefined) {
|
|
// Move to end (most recently used)
|
|
this.cache.delete(key);
|
|
this.cache.set(key, value);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
set(key: K, value: V): void {
|
|
if (this.cache.has(key)) {
|
|
this.cache.delete(key);
|
|
} else if (this.cache.size >= this.maxSize) {
|
|
// Remove least recently used (first item)
|
|
const firstKey = this.cache.keys().next().value;
|
|
if (firstKey !== undefined) {
|
|
this.cache.delete(firstKey);
|
|
}
|
|
}
|
|
this.cache.set(key, value);
|
|
}
|
|
|
|
clear(): void {
|
|
this.cache.clear();
|
|
}
|
|
|
|
get size(): number {
|
|
return this.cache.size;
|
|
}
|
|
}
|
|
|
|
// === Memory Index Implementation ===
|
|
|
|
export class MemoryIndex {
|
|
// Inverted indexes
|
|
private keywordIndex: Map<string, Set<string>> = new Map(); // keyword -> memoryIds
|
|
private typeIndex: Map<MemoryType, Set<string>> = new Map(); // type -> memoryIds
|
|
private agentIndex: Map<string, Set<string>> = new Map(); // agentId -> memoryIds
|
|
private tagIndex: Map<string, Set<string>> = new Map(); // tag -> memoryIds
|
|
|
|
// Pre-tokenized content cache
|
|
private tokenCache: Map<string, string[]> = new Map(); // memoryId -> tokens
|
|
|
|
// Query result cache
|
|
private queryCache: LRUCache<string, CacheEntry>;
|
|
|
|
// Statistics
|
|
private cacheHits = 0;
|
|
private cacheMisses = 0;
|
|
private queryTimes: number[] = [];
|
|
|
|
constructor(cacheSize = 100) {
|
|
this.queryCache = new LRUCache(cacheSize);
|
|
}
|
|
|
|
// === Index Building ===
|
|
|
|
/**
|
|
* Build or update index for a memory entry.
|
|
* Call this when adding or updating a memory.
|
|
*/
|
|
index(entry: MemoryEntry): void {
|
|
const { id, agentId, type, tags, content } = entry;
|
|
|
|
// Index by agent
|
|
if (!this.agentIndex.has(agentId)) {
|
|
this.agentIndex.set(agentId, new Set());
|
|
}
|
|
this.agentIndex.get(agentId)!.add(id);
|
|
|
|
// Index by type
|
|
if (!this.typeIndex.has(type)) {
|
|
this.typeIndex.set(type, new Set());
|
|
}
|
|
this.typeIndex.get(type)!.add(id);
|
|
|
|
// Index by tags
|
|
for (const tag of tags) {
|
|
const normalizedTag = tag.toLowerCase();
|
|
if (!this.tagIndex.has(normalizedTag)) {
|
|
this.tagIndex.set(normalizedTag, new Set());
|
|
}
|
|
this.tagIndex.get(normalizedTag)!.add(id);
|
|
}
|
|
|
|
// Index by content keywords
|
|
const tokens = tokenize(content);
|
|
this.tokenCache.set(id, tokens);
|
|
|
|
for (const token of tokens) {
|
|
if (!this.keywordIndex.has(token)) {
|
|
this.keywordIndex.set(token, new Set());
|
|
}
|
|
this.keywordIndex.get(token)!.add(id);
|
|
}
|
|
|
|
// Invalidate query cache on index change
|
|
this.queryCache.clear();
|
|
}
|
|
|
|
/**
|
|
* Remove a memory from all indexes.
|
|
*/
|
|
remove(memoryId: string): void {
|
|
// Remove from agent index
|
|
for (const [agentId, ids] of this.agentIndex) {
|
|
ids.delete(memoryId);
|
|
if (ids.size === 0) {
|
|
this.agentIndex.delete(agentId);
|
|
}
|
|
}
|
|
|
|
// Remove from type index
|
|
for (const [type, ids] of this.typeIndex) {
|
|
ids.delete(memoryId);
|
|
if (ids.size === 0) {
|
|
this.typeIndex.delete(type);
|
|
}
|
|
}
|
|
|
|
// Remove from tag index
|
|
for (const [tag, ids] of this.tagIndex) {
|
|
ids.delete(memoryId);
|
|
if (ids.size === 0) {
|
|
this.tagIndex.delete(tag);
|
|
}
|
|
}
|
|
|
|
// Remove from keyword index
|
|
for (const [keyword, ids] of this.keywordIndex) {
|
|
ids.delete(memoryId);
|
|
if (ids.size === 0) {
|
|
this.keywordIndex.delete(keyword);
|
|
}
|
|
}
|
|
|
|
// Remove token cache
|
|
this.tokenCache.delete(memoryId);
|
|
|
|
// Invalidate query cache
|
|
this.queryCache.clear();
|
|
}
|
|
|
|
/**
|
|
* Rebuild all indexes from scratch.
|
|
* Use after bulk updates or data corruption.
|
|
*/
|
|
rebuild(entries: MemoryEntry[]): void {
|
|
this.clear();
|
|
for (const entry of entries) {
|
|
this.index(entry);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clear all indexes.
|
|
*/
|
|
clear(): void {
|
|
this.keywordIndex.clear();
|
|
this.typeIndex.clear();
|
|
this.agentIndex.clear();
|
|
this.tagIndex.clear();
|
|
this.tokenCache.clear();
|
|
this.queryCache.clear();
|
|
this.cacheHits = 0;
|
|
this.cacheMisses = 0;
|
|
this.queryTimes = [];
|
|
}
|
|
|
|
// === Fast Filtering ===
|
|
|
|
/**
|
|
* Get candidate memory IDs based on filter options.
|
|
* Uses indexes for O(1) lookups instead of O(n) scans.
|
|
*/
|
|
getCandidates(options: {
|
|
agentId?: string;
|
|
type?: MemoryType;
|
|
types?: MemoryType[];
|
|
tags?: string[];
|
|
}): Set<string> | null {
|
|
const candidateSets: Set<string>[] = [];
|
|
|
|
// Filter by agent
|
|
if (options.agentId) {
|
|
const agentSet = this.agentIndex.get(options.agentId);
|
|
if (!agentSet) return new Set(); // Agent has no memories
|
|
candidateSets.push(agentSet);
|
|
}
|
|
|
|
// Filter by single type
|
|
if (options.type) {
|
|
const typeSet = this.typeIndex.get(options.type);
|
|
if (!typeSet) return new Set(); // No memories of this type
|
|
candidateSets.push(typeSet);
|
|
}
|
|
|
|
// Filter by multiple types
|
|
if (options.types && options.types.length > 0) {
|
|
const typeUnion = new Set<string>();
|
|
for (const t of options.types) {
|
|
const typeSet = this.typeIndex.get(t);
|
|
if (typeSet) {
|
|
for (const id of typeSet) {
|
|
typeUnion.add(id);
|
|
}
|
|
}
|
|
}
|
|
if (typeUnion.size === 0) return new Set();
|
|
candidateSets.push(typeUnion);
|
|
}
|
|
|
|
// Filter by tags (OR logic - match any tag)
|
|
if (options.tags && options.tags.length > 0) {
|
|
const tagUnion = new Set<string>();
|
|
for (const tag of options.tags) {
|
|
const normalizedTag = tag.toLowerCase();
|
|
const tagSet = this.tagIndex.get(normalizedTag);
|
|
if (tagSet) {
|
|
for (const id of tagSet) {
|
|
tagUnion.add(id);
|
|
}
|
|
}
|
|
}
|
|
if (tagUnion.size === 0) return new Set();
|
|
candidateSets.push(tagUnion);
|
|
}
|
|
|
|
// Intersect all candidate sets
|
|
if (candidateSets.length === 0) {
|
|
return null; // No filters applied, return null to indicate "all"
|
|
}
|
|
|
|
// Start with smallest set for efficiency
|
|
candidateSets.sort((a, b) => a.size - b.size);
|
|
let result = new Set(candidateSets[0]);
|
|
|
|
for (let i = 1; i < candidateSets.length; i++) {
|
|
const nextSet = candidateSets[i];
|
|
result = new Set([...result].filter(id => nextSet.has(id)));
|
|
if (result.size === 0) break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// === Keyword Search ===
|
|
|
|
/**
|
|
* Get memory IDs that contain any of the query keywords.
|
|
* Returns a map of memoryId -> match count for ranking.
|
|
*/
|
|
searchKeywords(queryTokens: string[]): Map<string, number> {
|
|
const matchCounts = new Map<string, number>();
|
|
|
|
for (const token of queryTokens) {
|
|
const matchingIds = this.keywordIndex.get(token);
|
|
if (matchingIds) {
|
|
for (const id of matchingIds) {
|
|
matchCounts.set(id, (matchCounts.get(id) ?? 0) + 1);
|
|
}
|
|
}
|
|
|
|
// Also check for partial matches (token is substring of indexed keyword)
|
|
for (const [keyword, ids] of this.keywordIndex) {
|
|
if (keyword.includes(token) || token.includes(keyword)) {
|
|
for (const id of ids) {
|
|
matchCounts.set(id, (matchCounts.get(id) ?? 0) + 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return matchCounts;
|
|
}
|
|
|
|
/**
|
|
* Get pre-tokenized content for a memory.
|
|
*/
|
|
getTokens(memoryId: string): string[] | undefined {
|
|
return this.tokenCache.get(memoryId);
|
|
}
|
|
|
|
// === Query Cache ===
|
|
|
|
/**
|
|
* Generate cache key from query and options.
|
|
*/
|
|
private getCacheKey(query: string, options?: Record<string, unknown>): string {
|
|
const opts = options ?? {};
|
|
return `${query}|${opts.agentId ?? ''}|${opts.type ?? ''}|${(opts.types as string[])?.join(',') ?? ''}|${(opts.tags as string[])?.join(',') ?? ''}|${opts.minImportance ?? ''}|${opts.limit ?? ''}`;
|
|
}
|
|
|
|
/**
|
|
* Get cached query results.
|
|
*/
|
|
getCached(query: string, options?: Record<string, unknown>): string[] | null {
|
|
const key = this.getCacheKey(query, options);
|
|
const cached = this.queryCache.get(key);
|
|
if (cached) {
|
|
this.cacheHits++;
|
|
return cached.results;
|
|
}
|
|
this.cacheMisses++;
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Cache query results.
|
|
*/
|
|
setCached(query: string, options: Record<string, unknown> | undefined, results: string[]): void {
|
|
const key = this.getCacheKey(query, options);
|
|
this.queryCache.set(key, {
|
|
results,
|
|
timestamp: Date.now(),
|
|
});
|
|
}
|
|
|
|
// === Statistics ===
|
|
|
|
/**
|
|
* Record query time for statistics.
|
|
*/
|
|
recordQueryTime(timeMs: number): void {
|
|
this.queryTimes.push(timeMs);
|
|
// Keep last 100 query times
|
|
if (this.queryTimes.length > 100) {
|
|
this.queryTimes.shift();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get index statistics.
|
|
*/
|
|
getStats(): IndexStats {
|
|
const avgQueryTime = this.queryTimes.length > 0
|
|
? this.queryTimes.reduce((a, b) => a + b, 0) / this.queryTimes.length
|
|
: 0;
|
|
|
|
const totalRequests = this.cacheHits + this.cacheMisses;
|
|
|
|
return {
|
|
totalEntries: this.tokenCache.size,
|
|
keywordCount: this.keywordIndex.size,
|
|
cacheHitRate: totalRequests > 0 ? this.cacheHits / totalRequests : 0,
|
|
cacheSize: this.queryCache.size,
|
|
avgQueryTime,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get index memory usage estimate.
|
|
*/
|
|
getMemoryUsage(): { estimated: number; breakdown: Record<string, number> } {
|
|
let keywordIndexSize = 0;
|
|
for (const [keyword, ids] of this.keywordIndex) {
|
|
keywordIndexSize += keyword.length * 2 + ids.size * 50; // rough estimate
|
|
}
|
|
|
|
return {
|
|
estimated:
|
|
keywordIndexSize +
|
|
this.typeIndex.size * 100 +
|
|
this.agentIndex.size * 100 +
|
|
this.tagIndex.size * 100 +
|
|
this.tokenCache.size * 200,
|
|
breakdown: {
|
|
keywordIndex: keywordIndexSize,
|
|
typeIndex: this.typeIndex.size * 100,
|
|
agentIndex: this.agentIndex.size * 100,
|
|
tagIndex: this.tagIndex.size * 100,
|
|
tokenCache: this.tokenCache.size * 200,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
// === Singleton ===
|
|
|
|
let _instance: MemoryIndex | null = null;
|
|
|
|
export function getMemoryIndex(): MemoryIndex {
|
|
if (!_instance) {
|
|
_instance = new MemoryIndex();
|
|
}
|
|
return _instance;
|
|
}
|
|
|
|
export function resetMemoryIndex(): void {
|
|
_instance = null;
|
|
}
|