feat(l4): upgrade engines with LLM-powered capabilities (Phase 2)

Phase 2 LLM Engine Upgrades: - ReflectionEngine: Add LLM semantic analysis for pattern detection - ContextCompactor: Add LLM summarization for high-quality compaction - MemoryExtractor: Add LLM importance scoring for memory extraction - Add unified LLM service adapter (OpenAI, Volcengine, Gateway, Mock) - Add MemorySource 'llm-reflection' for LLM-generated memories - Add 13 integration tests for LLM-powered features Config options added: - useLLM: Enable LLM mode for each engine - llmProvider: Preferred LLM provider - llmFallbackToRules: Fallback to rules if LLM fails Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 10:41:03 +08:00
parent ef3315db69
commit 0b89329e19
5 changed files with 599 additions and 16 deletions
--- a/desktop/src/lib/tests/llm-integration.test.ts
+++ b/desktop/src/lib/tests/llm-integration.test.ts
@@ -0,0 +1,228 @@
+/**
+ * LLM Integration Tests - Phase 2 Engine Upgrades
+ *
+ * Tests for LLM-powered features:
+ * - ReflectionEngine with LLM semantic analysis
+ * - ContextCompactor with LLM summarization
+ * - MemoryExtractor with LLM importance scoring
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  ReflectionEngine,
+  DEFAULT_REFLECTION_CONFIG,
+  type ReflectionConfig,
+} from '../reflection-engine';
+import {
+  ContextCompactor,
+  DEFAULT_COMPACTION_CONFIG,
+  type CompactionConfig,
+} from '../context-compactor';
+import {
+  MemoryExtractor,
+  DEFAULT_EXTRACTION_CONFIG,
+  type ExtractionConfig,
+} from '../memory-extractor';
+import {
+  getLLMAdapter,
+  resetLLMAdapter,
+  type LLMProvider,
+} from '../llm-service';
+
+// === Mock LLM Adapter ===
+
+const mockLLMAdapter = {
+  complete: vi.fn(),
+  isAvailable: vi.fn(() => true),
+  getProvider: vi.fn(() => 'mock' as LLMProvider),
+};
+
+vi.mock('../llm-service', () => ({
+  getLLMAdapter: vi.fn(() => mockLLMAdapter),
+  resetLLMAdapter: vi.fn(),
+  llmReflect: vi.fn(async () => JSON.stringify({
+    patterns: [
+      {
+        observation: '用户经常询问代码优化问题',
+        frequency: 5,
+        sentiment: 'positive',
+        evidence: ['多次讨论性能优化'],
+      },
+    ],
+    improvements: [
+      {
+        area: '代码解释',
+        suggestion: '可以提供更详细的代码注释',
+        priority: 'medium',
+      },
+    ],
+    identityProposals: [],
+  })),
+  llmCompact: vi.fn(async () => '[LLM摘要]\n讨论主题: 代码优化\n关键决策: 使用缓存策略\n待办事项: 完成性能测试'),
+  llmExtract: vi.fn(async () => JSON.stringify([
+    { content: '用户偏好简洁的回答', type: 'preference', importance: 7, tags: ['style'] },
+    { content: '项目使用 TypeScript', type: 'fact', importance: 6, tags: ['tech'] },
+  ])),
+}));
+
+// === ReflectionEngine Tests ===
+
+describe('ReflectionEngine with LLM', () => {
+  let engine: ReflectionEngine;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    engine = new ReflectionEngine({ useLLM: true });
+  });
+
+  afterEach(() => {
+    engine?.updateConfig({ useLLM: false });
+  });
+
+  it('should initialize with LLM config', () => {
+    const config = engine.getConfig();
+    expect(config.useLLM).toBe(true);
+  });
+
+  it('should have llmFallbackToRules enabled by default', () => {
+    const config = engine.getConfig();
+    expect(config.llmFallbackToRules).toBe(true);
+  });
+
+  it('should track conversations for reflection trigger', () => {
+    engine.recordConversation();
+    engine.recordConversation();
+    expect(engine.shouldReflect()).toBe(false);
+
+    // After 5 conversations (default trigger)
+    for (let i = 0; i < 4; i++) {
+      engine.recordConversation();
+    }
+    expect(engine.shouldReflect()).toBe(true);
+  });
+
+  it('should use LLM when enabled and available', async () => {
+    mockLLMAdapter.isAvailable.mockReturnValue(true);
+
+    const result = await engine.reflect('test-agent', { forceLLM: true });
+
+    expect(result.patterns.length).toBeGreaterThan(0);
+    expect(result.timestamp).toBeDefined();
+  });
+
+  it('should fallback to rules when LLM fails', async () => {
+    mockLLMAdapter.isAvailable.mockReturnValue(false);
+
+    const result = await engine.reflect('test-agent');
+
+    // Should still work with rule-based approach
+    expect(result).toBeDefined();
+    expect(result.timestamp).toBeDefined();
+  });
+});
+
+// === ContextCompactor Tests ===
+
+describe('ContextCompactor with LLM', () => {
+  let compactor: ContextCompactor;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    compactor = new ContextCompactor({ useLLM: true });
+  });
+
+  it('should initialize with LLM config', () => {
+    const config = compactor.getConfig();
+    expect(config.useLLM).toBe(true);
+  });
+
+  it('should have llmFallbackToRules enabled by default', () => {
+    const config = compactor.getConfig();
+    expect(config.llmFallbackToRules).toBe(true);
+  });
+
+  it('should check threshold correctly', () => {
+    const messages = [
+      { role: 'user', content: 'Hello'.repeat(1000) },
+      { role: 'assistant', content: 'Response'.repeat(1000) },
+    ];
+
+    const check = compactor.checkThreshold(messages);
+    expect(check.shouldCompact).toBe(false);
+    expect(check.urgency).toBe('none');
+  });
+
+  it('should trigger soft threshold', () => {
+    // Create enough messages to exceed 15000 soft threshold but not 20000 hard threshold
+    // estimateTokens: CJK chars ~1.5 tokens each
+    // 20 messages × 600 CJK chars × 1.5 = ~18000 tokens (between soft and hard)
+    const messages = Array(20).fill(null).map((_, i) => ({
+      role: i % 2 === 0 ? 'user' : 'assistant',
+      content: '测试内容'.repeat(150), // 600 CJK chars ≈ 900 tokens each
+    }));
+
+    const check = compactor.checkThreshold(messages);
+    expect(check.shouldCompact).toBe(true);
+    expect(check.urgency).toBe('soft');
+  });
+});
+
+// === MemoryExtractor Tests ===
+
+describe('MemoryExtractor with LLM', () => {
+  let extractor: MemoryExtractor;
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    extractor = new MemoryExtractor({ useLLM: true });
+  });
+
+  it('should initialize with LLM config', () => {
+    // MemoryExtractor doesn't expose config directly, but we can test behavior
+    expect(extractor).toBeDefined();
+  });
+
+  it('should skip extraction with too few messages', async () => {
+    const messages = [
+      { role: 'user', content: 'Hi' },
+      { role: 'assistant', content: 'Hello!' },
+    ];
+
+    const result = await extractor.extractFromConversation(messages, 'test-agent');
+    expect(result.saved).toBe(0);
+  });
+
+  it('should extract with enough messages', async () => {
+    const messages = [
+      { role: 'user', content: '我喜欢简洁的回答' },
+      { role: 'assistant', content: '好的，我会简洁一些' },
+      { role: 'user', content: '我的项目使用 TypeScript' },
+      { role: 'assistant', content: 'TypeScript 是个好选择' },
+      { role: 'user', content: '继续' },
+      { role: 'assistant', content: '继续...' },
+    ];
+
+    const result = await extractor.extractFromConversation(messages, 'test-agent');
+    expect(result.items.length).toBeGreaterThanOrEqual(0);
+  });
+});
+
+// === Integration Test ===
+
+describe('LLM Integration Full Flow', () => {
+  it('should work end-to-end with all engines', async () => {
+    // Setup all engines with LLM
+    const engine = new ReflectionEngine({ useLLM: true, llmFallbackToRules: true });
+    const compactor = new ContextCompactor({ useLLM: true, llmFallbackToRules: true });
+    const extractor = new MemoryExtractor({ useLLM: true, llmFallbackToRules: true });
+
+    // Verify they all have LLM support
+    expect(engine.getConfig().useLLM).toBe(true);
+    expect(compactor.getConfig().useLLM).toBe(true);
+
+    // All should work without throwing
+    await expect(engine.reflect('test-agent')).resolves;
+    await expect(compactor.compact([], 'test-agent')).resolves;
+    await expect(extractor.extractFromConversation([], 'test-agent')).resolves;
+  });
+});