docs(claude): restructure documentation management and add feedback system
- Restructure §8 from "文档沉淀规则" to "文档管理规则" with 4 subsections - Add docs/ structure with features/ and knowledge-base/ directories - Add feature documentation template with 7 sections (概述/设计初衷/技术设计/预期作用/实际效果/演化路线/头脑风暴) - Add feature update trigger matrix (新增/修改/完成/问题/反馈) - Add documentation quality checklist - Add §16
This commit is contained in:
443
desktop/src/lib/memory-index.ts
Normal file
443
desktop/src/lib/memory-index.ts
Normal file
@@ -0,0 +1,443 @@
|
||||
/**
|
||||
* Memory Index - High-performance indexing for agent memory retrieval
|
||||
*
|
||||
* Implements inverted index + LRU cache for sub-20ms retrieval on 1000+ memories.
|
||||
*
|
||||
* Performance targets:
|
||||
* - Retrieval latency: <20ms (vs ~50ms with linear scan)
|
||||
* - 1000 memories: smooth operation
|
||||
* - Memory overhead: ~30% additional for indexes
|
||||
*
|
||||
* Reference: Task "Optimize ZCLAW Agent Memory Retrieval Performance"
|
||||
*/
|
||||
|
||||
import type { MemoryEntry, MemoryType } from './agent-memory';
|
||||
|
||||
// === Types ===
|
||||
|
||||
export interface IndexStats {
|
||||
totalEntries: number;
|
||||
keywordCount: number;
|
||||
cacheHitRate: number;
|
||||
cacheSize: number;
|
||||
avgQueryTime: number;
|
||||
}
|
||||
|
||||
interface CacheEntry {
|
||||
results: string[]; // memory IDs
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
// === Tokenization (shared with agent-memory.ts) ===
|
||||
|
||||
export function tokenize(text: string): string[] {
|
||||
return text
|
||||
.toLowerCase()
|
||||
.replace(/[^\w\u4e00-\u9fff\u3400-\u4dbf]+/g, ' ')
|
||||
.split(/\s+/)
|
||||
.filter(t => t.length > 0);
|
||||
}
|
||||
|
||||
// === LRU Cache Implementation ===
|
||||
|
||||
class LRUCache<K, V> {
|
||||
private cache: Map<K, V>;
|
||||
private maxSize: number;
|
||||
|
||||
constructor(maxSize: number) {
|
||||
this.cache = new Map();
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
get(key: K): V | undefined {
|
||||
const value = this.cache.get(key);
|
||||
if (value !== undefined) {
|
||||
// Move to end (most recently used)
|
||||
this.cache.delete(key);
|
||||
this.cache.set(key, value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
set(key: K, value: V): void {
|
||||
if (this.cache.has(key)) {
|
||||
this.cache.delete(key);
|
||||
} else if (this.cache.size >= this.maxSize) {
|
||||
// Remove least recently used (first item)
|
||||
const firstKey = this.cache.keys().next().value;
|
||||
if (firstKey !== undefined) {
|
||||
this.cache.delete(firstKey);
|
||||
}
|
||||
}
|
||||
this.cache.set(key, value);
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.cache.clear();
|
||||
}
|
||||
|
||||
get size(): number {
|
||||
return this.cache.size;
|
||||
}
|
||||
}
|
||||
|
||||
// === Memory Index Implementation ===
|
||||
|
||||
export class MemoryIndex {
|
||||
// Inverted indexes
|
||||
private keywordIndex: Map<string, Set<string>> = new Map(); // keyword -> memoryIds
|
||||
private typeIndex: Map<MemoryType, Set<string>> = new Map(); // type -> memoryIds
|
||||
private agentIndex: Map<string, Set<string>> = new Map(); // agentId -> memoryIds
|
||||
private tagIndex: Map<string, Set<string>> = new Map(); // tag -> memoryIds
|
||||
|
||||
// Pre-tokenized content cache
|
||||
private tokenCache: Map<string, string[]> = new Map(); // memoryId -> tokens
|
||||
|
||||
// Query result cache
|
||||
private queryCache: LRUCache<string, CacheEntry>;
|
||||
|
||||
// Statistics
|
||||
private cacheHits = 0;
|
||||
private cacheMisses = 0;
|
||||
private queryTimes: number[] = [];
|
||||
|
||||
constructor(cacheSize = 100) {
|
||||
this.queryCache = new LRUCache(cacheSize);
|
||||
}
|
||||
|
||||
// === Index Building ===
|
||||
|
||||
/**
|
||||
* Build or update index for a memory entry.
|
||||
* Call this when adding or updating a memory.
|
||||
*/
|
||||
index(entry: MemoryEntry): void {
|
||||
const { id, agentId, type, tags, content } = entry;
|
||||
|
||||
// Index by agent
|
||||
if (!this.agentIndex.has(agentId)) {
|
||||
this.agentIndex.set(agentId, new Set());
|
||||
}
|
||||
this.agentIndex.get(agentId)!.add(id);
|
||||
|
||||
// Index by type
|
||||
if (!this.typeIndex.has(type)) {
|
||||
this.typeIndex.set(type, new Set());
|
||||
}
|
||||
this.typeIndex.get(type)!.add(id);
|
||||
|
||||
// Index by tags
|
||||
for (const tag of tags) {
|
||||
const normalizedTag = tag.toLowerCase();
|
||||
if (!this.tagIndex.has(normalizedTag)) {
|
||||
this.tagIndex.set(normalizedTag, new Set());
|
||||
}
|
||||
this.tagIndex.get(normalizedTag)!.add(id);
|
||||
}
|
||||
|
||||
// Index by content keywords
|
||||
const tokens = tokenize(content);
|
||||
this.tokenCache.set(id, tokens);
|
||||
|
||||
for (const token of tokens) {
|
||||
if (!this.keywordIndex.has(token)) {
|
||||
this.keywordIndex.set(token, new Set());
|
||||
}
|
||||
this.keywordIndex.get(token)!.add(id);
|
||||
}
|
||||
|
||||
// Invalidate query cache on index change
|
||||
this.queryCache.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a memory from all indexes.
|
||||
*/
|
||||
remove(memoryId: string): void {
|
||||
// Remove from agent index
|
||||
for (const [agentId, ids] of this.agentIndex) {
|
||||
ids.delete(memoryId);
|
||||
if (ids.size === 0) {
|
||||
this.agentIndex.delete(agentId);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from type index
|
||||
for (const [type, ids] of this.typeIndex) {
|
||||
ids.delete(memoryId);
|
||||
if (ids.size === 0) {
|
||||
this.typeIndex.delete(type);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from tag index
|
||||
for (const [tag, ids] of this.tagIndex) {
|
||||
ids.delete(memoryId);
|
||||
if (ids.size === 0) {
|
||||
this.tagIndex.delete(tag);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from keyword index
|
||||
for (const [keyword, ids] of this.keywordIndex) {
|
||||
ids.delete(memoryId);
|
||||
if (ids.size === 0) {
|
||||
this.keywordIndex.delete(keyword);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove token cache
|
||||
this.tokenCache.delete(memoryId);
|
||||
|
||||
// Invalidate query cache
|
||||
this.queryCache.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild all indexes from scratch.
|
||||
* Use after bulk updates or data corruption.
|
||||
*/
|
||||
rebuild(entries: MemoryEntry[]): void {
|
||||
this.clear();
|
||||
for (const entry of entries) {
|
||||
this.index(entry);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all indexes.
|
||||
*/
|
||||
clear(): void {
|
||||
this.keywordIndex.clear();
|
||||
this.typeIndex.clear();
|
||||
this.agentIndex.clear();
|
||||
this.tagIndex.clear();
|
||||
this.tokenCache.clear();
|
||||
this.queryCache.clear();
|
||||
this.cacheHits = 0;
|
||||
this.cacheMisses = 0;
|
||||
this.queryTimes = [];
|
||||
}
|
||||
|
||||
// === Fast Filtering ===
|
||||
|
||||
/**
|
||||
* Get candidate memory IDs based on filter options.
|
||||
* Uses indexes for O(1) lookups instead of O(n) scans.
|
||||
*/
|
||||
getCandidates(options: {
|
||||
agentId?: string;
|
||||
type?: MemoryType;
|
||||
types?: MemoryType[];
|
||||
tags?: string[];
|
||||
}): Set<string> | null {
|
||||
const candidateSets: Set<string>[] = [];
|
||||
|
||||
// Filter by agent
|
||||
if (options.agentId) {
|
||||
const agentSet = this.agentIndex.get(options.agentId);
|
||||
if (!agentSet) return new Set(); // Agent has no memories
|
||||
candidateSets.push(agentSet);
|
||||
}
|
||||
|
||||
// Filter by single type
|
||||
if (options.type) {
|
||||
const typeSet = this.typeIndex.get(options.type);
|
||||
if (!typeSet) return new Set(); // No memories of this type
|
||||
candidateSets.push(typeSet);
|
||||
}
|
||||
|
||||
// Filter by multiple types
|
||||
if (options.types && options.types.length > 0) {
|
||||
const typeUnion = new Set<string>();
|
||||
for (const t of options.types) {
|
||||
const typeSet = this.typeIndex.get(t);
|
||||
if (typeSet) {
|
||||
for (const id of typeSet) {
|
||||
typeUnion.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (typeUnion.size === 0) return new Set();
|
||||
candidateSets.push(typeUnion);
|
||||
}
|
||||
|
||||
// Filter by tags (OR logic - match any tag)
|
||||
if (options.tags && options.tags.length > 0) {
|
||||
const tagUnion = new Set<string>();
|
||||
for (const tag of options.tags) {
|
||||
const normalizedTag = tag.toLowerCase();
|
||||
const tagSet = this.tagIndex.get(normalizedTag);
|
||||
if (tagSet) {
|
||||
for (const id of tagSet) {
|
||||
tagUnion.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (tagUnion.size === 0) return new Set();
|
||||
candidateSets.push(tagUnion);
|
||||
}
|
||||
|
||||
// Intersect all candidate sets
|
||||
if (candidateSets.length === 0) {
|
||||
return null; // No filters applied, return null to indicate "all"
|
||||
}
|
||||
|
||||
// Start with smallest set for efficiency
|
||||
candidateSets.sort((a, b) => a.size - b.size);
|
||||
let result = new Set(candidateSets[0]);
|
||||
|
||||
for (let i = 1; i < candidateSets.length; i++) {
|
||||
const nextSet = candidateSets[i];
|
||||
result = new Set([...result].filter(id => nextSet.has(id)));
|
||||
if (result.size === 0) break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// === Keyword Search ===
|
||||
|
||||
/**
|
||||
* Get memory IDs that contain any of the query keywords.
|
||||
* Returns a map of memoryId -> match count for ranking.
|
||||
*/
|
||||
searchKeywords(queryTokens: string[]): Map<string, number> {
|
||||
const matchCounts = new Map<string, number>();
|
||||
|
||||
for (const token of queryTokens) {
|
||||
const matchingIds = this.keywordIndex.get(token);
|
||||
if (matchingIds) {
|
||||
for (const id of matchingIds) {
|
||||
matchCounts.set(id, (matchCounts.get(id) ?? 0) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Also check for partial matches (token is substring of indexed keyword)
|
||||
for (const [keyword, ids] of this.keywordIndex) {
|
||||
if (keyword.includes(token) || token.includes(keyword)) {
|
||||
for (const id of ids) {
|
||||
matchCounts.set(id, (matchCounts.get(id) ?? 0) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matchCounts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get pre-tokenized content for a memory.
|
||||
*/
|
||||
getTokens(memoryId: string): string[] | undefined {
|
||||
return this.tokenCache.get(memoryId);
|
||||
}
|
||||
|
||||
// === Query Cache ===
|
||||
|
||||
/**
|
||||
* Generate cache key from query and options.
|
||||
*/
|
||||
private getCacheKey(query: string, options?: Record<string, unknown>): string {
|
||||
const opts = options ?? {};
|
||||
return `${query}|${opts.agentId ?? ''}|${opts.type ?? ''}|${(opts.types as string[])?.join(',') ?? ''}|${(opts.tags as string[])?.join(',') ?? ''}|${opts.minImportance ?? ''}|${opts.limit ?? ''}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get cached query results.
|
||||
*/
|
||||
getCached(query: string, options?: Record<string, unknown>): string[] | null {
|
||||
const key = this.getCacheKey(query, options);
|
||||
const cached = this.queryCache.get(key);
|
||||
if (cached) {
|
||||
this.cacheHits++;
|
||||
return cached.results;
|
||||
}
|
||||
this.cacheMisses++;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache query results.
|
||||
*/
|
||||
setCached(query: string, options: Record<string, unknown> | undefined, results: string[]): void {
|
||||
const key = this.getCacheKey(query, options);
|
||||
this.queryCache.set(key, {
|
||||
results,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
// === Statistics ===
|
||||
|
||||
/**
|
||||
* Record query time for statistics.
|
||||
*/
|
||||
recordQueryTime(timeMs: number): void {
|
||||
this.queryTimes.push(timeMs);
|
||||
// Keep last 100 query times
|
||||
if (this.queryTimes.length > 100) {
|
||||
this.queryTimes.shift();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index statistics.
|
||||
*/
|
||||
getStats(): IndexStats {
|
||||
const avgQueryTime = this.queryTimes.length > 0
|
||||
? this.queryTimes.reduce((a, b) => a + b, 0) / this.queryTimes.length
|
||||
: 0;
|
||||
|
||||
const totalRequests = this.cacheHits + this.cacheMisses;
|
||||
|
||||
return {
|
||||
totalEntries: this.tokenCache.size,
|
||||
keywordCount: this.keywordIndex.size,
|
||||
cacheHitRate: totalRequests > 0 ? this.cacheHits / totalRequests : 0,
|
||||
cacheSize: this.queryCache.size,
|
||||
avgQueryTime,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index memory usage estimate.
|
||||
*/
|
||||
getMemoryUsage(): { estimated: number; breakdown: Record<string, number> } {
|
||||
let keywordIndexSize = 0;
|
||||
for (const [keyword, ids] of this.keywordIndex) {
|
||||
keywordIndexSize += keyword.length * 2 + ids.size * 50; // rough estimate
|
||||
}
|
||||
|
||||
return {
|
||||
estimated:
|
||||
keywordIndexSize +
|
||||
this.typeIndex.size * 100 +
|
||||
this.agentIndex.size * 100 +
|
||||
this.tagIndex.size * 100 +
|
||||
this.tokenCache.size * 200,
|
||||
breakdown: {
|
||||
keywordIndex: keywordIndexSize,
|
||||
typeIndex: this.typeIndex.size * 100,
|
||||
agentIndex: this.agentIndex.size * 100,
|
||||
tagIndex: this.tagIndex.size * 100,
|
||||
tokenCache: this.tokenCache.size * 200,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// === Singleton ===
|
||||
|
||||
let _instance: MemoryIndex | null = null;
|
||||
|
||||
export function getMemoryIndex(): MemoryIndex {
|
||||
if (!_instance) {
|
||||
_instance = new MemoryIndex();
|
||||
}
|
||||
return _instance;
|
||||
}
|
||||
|
||||
export function resetMemoryIndex(): void {
|
||||
_instance = null;
|
||||
}
|
||||
Reference in New Issue
Block a user