/** * LLM Service Adapter - Unified LLM interface for L4 self-evolution engines * * Provides a unified interface for: * - ReflectionEngine: Semantic analysis + deep reflection * - ContextCompactor: High-quality summarization * - MemoryExtractor: Semantic importance scoring * * Supports multiple backends: * - OpenAI (GPT-4, GPT-3.5) * - Volcengine (Doubao) * - ZCLAW Gateway (passthrough) * * Part of ZCLAW L4 Self-Evolution capability. */ import { DEFAULT_MODEL_ID, DEFAULT_OPENAI_BASE_URL } from '../constants/models'; import { createLogger } from './logger'; const log = createLogger('LLMService'); // === Types === export type LLMProvider = 'openai' | 'volcengine' | 'gateway' | 'saas' | 'mock'; export interface LLMConfig { provider: LLMProvider; model?: string; apiKey?: string; apiBase?: string; maxTokens?: number; temperature?: number; timeout?: number; } export interface LLMMessage { role: 'system' | 'user' | 'assistant'; content: string; } export interface LLMResponse { content: string; tokensUsed?: { input: number; output: number; }; model?: string; latencyMs?: number; } export interface LLMServiceAdapter { complete(messages: LLMMessage[], options?: Partial): Promise; isAvailable(): boolean; getProvider(): LLMProvider; } // === Default Configs === const DEFAULT_CONFIGS: Record = { openai: { provider: 'openai', model: DEFAULT_MODEL_ID, apiBase: DEFAULT_OPENAI_BASE_URL, maxTokens: 2000, temperature: 0.7, timeout: 30000, }, volcengine: { provider: 'volcengine', model: 'doubao-pro-32k', apiBase: 'https://ark.cn-beijing.volces.com/api/v3', maxTokens: 2000, temperature: 0.7, timeout: 30000, }, gateway: { provider: 'gateway', apiBase: '/api/llm', maxTokens: 2000, temperature: 0.7, timeout: 60000, }, saas: { provider: 'saas', maxTokens: 4096, temperature: 0.7, timeout: 300000, // 5 min for streaming }, mock: { provider: 'mock', maxTokens: 100, temperature: 0, timeout: 100, }, }; // === Storage === const LLM_CONFIG_KEY = 'zclaw-llm-config'; // === Mock Adapter (for testing) === class MockLLMAdapter implements LLMServiceAdapter { constructor(_config: LLMConfig) { // Config is stored for future use (e.g., custom mock behavior based on config) } async complete(messages: LLMMessage[]): Promise { // Simulate latency await new Promise((resolve) => setTimeout(resolve, 50)); const lastMessage = messages[messages.length - 1]; const content = lastMessage?.content || ''; // Generate mock response based on content type let response = '[Mock LLM Response] '; if (content.includes('reflect') || content.includes('反思')) { response += JSON.stringify({ patterns: [ { observation: '用户经常询问代码优化相关问题', frequency: 5, sentiment: 'positive', evidence: ['多次讨论性能优化', '关注代码质量'], }, ], improvements: [ { area: '代码解释', suggestion: '可以提供更详细的代码注释', priority: 'medium', }, ], identityProposals: [], }); } else if (content.includes('summarize') || content.includes('摘要')) { response += '这是一个关于对话内容的摘要,包含了主要讨论的要点和结论。'; } else if (content.includes('importance') || content.includes('重要性')) { response += JSON.stringify({ memories: [ { content: '用户偏好简洁的回答', importance: 7, type: 'preference' }, ], }); } else { response += 'Processed: ' + content.slice(0, 50); } return { content: response, tokensUsed: { input: content.length / 4, output: response.length / 4 }, model: 'mock-model', latencyMs: 50, }; } isAvailable(): boolean { return true; } getProvider(): LLMProvider { return 'mock'; } } // === OpenAI Adapter === class OpenAILLMAdapter implements LLMServiceAdapter { private config: LLMConfig; constructor(config: LLMConfig) { this.config = { ...DEFAULT_CONFIGS.openai, ...config }; } async complete(messages: LLMMessage[], options?: Partial): Promise { const config = { ...this.config, ...options }; const startTime = Date.now(); if (!config.apiKey) { throw new Error('[OpenAI] API key not configured'); } const response = await fetch(`${config.apiBase}/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.apiKey}`, }, body: JSON.stringify({ model: config.model, messages, max_tokens: config.maxTokens, temperature: config.temperature, }), signal: AbortSignal.timeout(config.timeout || 30000), }); if (!response.ok) { const errorBody = await response.text(); // Log full error in development only if (import.meta.env.DEV) { console.error('[OpenAI] API error:', errorBody); } // Return sanitized error to caller throw new Error(`[OpenAI] API error: ${response.status} - Request failed`); } const data = await response.json(); const latencyMs = Date.now() - startTime; return { content: data.choices[0]?.message?.content || '', tokensUsed: { input: data.usage?.prompt_tokens || 0, output: data.usage?.completion_tokens || 0, }, model: data.model, latencyMs, }; } isAvailable(): boolean { return !!this.config.apiKey; } getProvider(): LLMProvider { return 'openai'; } } // === Volcengine Adapter === class VolcengineLLMAdapter implements LLMServiceAdapter { private config: LLMConfig; constructor(config: LLMConfig) { this.config = { ...DEFAULT_CONFIGS.volcengine, ...config }; } async complete(messages: LLMMessage[], options?: Partial): Promise { const config = { ...this.config, ...options }; const startTime = Date.now(); if (!config.apiKey) { throw new Error('[Volcengine] API key not configured'); } const response = await fetch(`${config.apiBase}/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${config.apiKey}`, }, body: JSON.stringify({ model: config.model, messages, max_tokens: config.maxTokens, temperature: config.temperature, }), signal: AbortSignal.timeout(config.timeout || 30000), }); if (!response.ok) { const errorBody = await response.text(); // Log full error in development only if (import.meta.env.DEV) { console.error('[Volcengine] API error:', errorBody); } // Return sanitized error to caller throw new Error(`[Volcengine] API error: ${response.status} - Request failed`); } const data = await response.json(); const latencyMs = Date.now() - startTime; return { content: data.choices[0]?.message?.content || '', tokensUsed: { input: data.usage?.prompt_tokens || 0, output: data.usage?.completion_tokens || 0, }, model: data.model, latencyMs, }; } isAvailable(): boolean { return !!this.config.apiKey; } getProvider(): LLMProvider { return 'volcengine'; } } // === Gateway Adapter (pass through to ZCLAW or internal Kernel) === class GatewayLLMAdapter implements LLMServiceAdapter { private config: LLMConfig; constructor(config: LLMConfig) { this.config = { ...DEFAULT_CONFIGS.gateway, ...config }; } async complete(messages: LLMMessage[], options?: Partial): Promise { const config = { ...this.config, ...options }; const startTime = Date.now(); // Build a single prompt from messages const systemMessage = messages.find(m => m.role === 'system')?.content || ''; const userMessage = messages.find(m => m.role === 'user')?.content || ''; // Combine system and user messages into a single prompt const fullPrompt = systemMessage ? `${systemMessage}\n\n${userMessage}` : userMessage; // Check if running in Tauri with internal kernel // Use the same detection as kernel-client.ts const isTauri = typeof window !== 'undefined' && '__TAURI_INTERNALS__' in window; if (isTauri) { // Use internal Kernel via Tauri invoke try { const { invoke } = await import('@tauri-apps/api/core'); // Get the default agent ID from connectionStore or use the first agent const agentId = localStorage.getItem('zclaw-default-agent-id'); const response = await invoke<{ content: string; input_tokens: number; output_tokens: number }>('agent_chat', { request: { agentId: agentId || null, // null will use default agent message: fullPrompt, }, }); const latencyMs = Date.now() - startTime; return { content: response.content || '', tokensUsed: { input: response.input_tokens || 0, output: response.output_tokens || 0, }, latencyMs, }; } catch (err) { console.error('[LLMService] Kernel chat failed:', err); const message = err instanceof Error ? err.message : String(err); throw new Error(`[Gateway] Kernel chat failed: ${message}`); } } // External Gateway mode: Use ZCLAW's chat endpoint const agentId = localStorage.getItem('zclaw-default-agent-id') || 'default'; const response = await fetch(`/api/agents/${agentId}/message`, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ message: fullPrompt, max_tokens: config.maxTokens, temperature: config.temperature ?? 0.3, // Lower temperature for extraction tasks }), signal: AbortSignal.timeout(config.timeout || 60000), }); if (!response.ok) { const error = await response.text(); // If agent not found, try without agent ID (direct /api/chat) if (response.status === 404) { const fallbackResponse = await fetch('/api/chat', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ message: fullPrompt, max_tokens: config.maxTokens, temperature: config.temperature ?? 0.3, }), signal: AbortSignal.timeout(config.timeout || 60000), }); if (!fallbackResponse.ok) { throw new Error(`[Gateway] Both endpoints failed: ${fallbackResponse.status}`); } const data = await fallbackResponse.json(); const latencyMs = Date.now() - startTime; return { content: data.response || data.content || '', tokensUsed: { input: data.input_tokens || 0, output: data.output_tokens || 0 }, latencyMs, }; } throw new Error(`[Gateway] API error: ${response.status} - ${error}`); } const data = await response.json(); const latencyMs = Date.now() - startTime; return { content: data.response || data.content || '', tokensUsed: { input: data.input_tokens || 0, output: data.output_tokens || 0 }, latencyMs, }; } isAvailable(): boolean { // Gateway is available if we're in browser (can connect to ZCLAW) return typeof window !== 'undefined'; } getProvider(): LLMProvider { return 'gateway'; } } // === SaaS Relay Adapter (via SaaS backend) === class SaasLLMAdapter implements LLMServiceAdapter { private config: LLMConfig; constructor(config: LLMConfig) { this.config = { ...DEFAULT_CONFIGS.saas, ...config }; } async complete(messages: LLMMessage[], options?: Partial): Promise { const config = { ...this.config, ...options }; const startTime = Date.now(); // Dynamic import to avoid circular dependency const { useSaaSStore } = await import('../store/saasStore'); const { saasUrl, authToken } = useSaaSStore.getState(); if (!saasUrl || !authToken) { throw new Error('[SaaS] 未登录 SaaS 平台,请先在设置中登录'); } // Dynamic import of SaaSClient singleton const { saasClient } = await import('./saas-client'); saasClient.setBaseUrl(saasUrl); saasClient.setToken(authToken); const openaiBody = { model: config.model || 'default', messages, max_tokens: config.maxTokens || 4096, temperature: config.temperature ?? 0.7, stream: false, }; const response = await saasClient.chatCompletion( openaiBody, AbortSignal.timeout(config.timeout || 300000), ); if (!response.ok) { const errorData = await response.json().catch(() => ({ error: 'unknown', message: `SaaS relay 请求失败 (${response.status})`, })); throw new Error( `[SaaS] ${errorData.message || errorData.error || `请求失败: ${response.status}`}`, ); } const data = await response.json(); const latencyMs = Date.now() - startTime; const result = { content: data.choices?.[0]?.message?.content || '', tokensUsed: { input: data.usage?.prompt_tokens || 0, output: data.usage?.completion_tokens || 0, }, model: data.model, latencyMs, }; // Record telemetry for SaaS relay usage try { const { recordLLMUsage } = await import('./telemetry-collector'); recordLLMUsage( result.model || 'saas-relay', result.tokensUsed.input, result.tokensUsed.output, { latencyMs, success: true, connectionMode: 'saas' }, ); } catch (e) { log.debug('Failed to record LLM telemetry', { error: e }); } return result; } isAvailable(): boolean { // Check synchronously via localStorage for availability check. // Auth is cookie-based — check connection mode + URL presence. try { const mode = localStorage.getItem('zclaw-connection-mode'); const saasUrl = localStorage.getItem('zclaw-saas-url'); return mode === 'saas' && !!saasUrl; } catch (e) { log.debug('Failed to check SaaS adapter availability', { error: e }); return false; } } getProvider(): LLMProvider { return 'saas'; } } // === Factory === let cachedAdapter: LLMServiceAdapter | null = null; export function createLLMAdapter(config?: Partial): LLMServiceAdapter { const savedConfig = loadConfig(); const finalConfig = { ...savedConfig, ...config }; switch (finalConfig.provider) { case 'openai': return new OpenAILLMAdapter(finalConfig); case 'volcengine': return new VolcengineLLMAdapter(finalConfig); case 'gateway': return new GatewayLLMAdapter(finalConfig); case 'saas': return new SaasLLMAdapter(finalConfig); case 'mock': default: return new MockLLMAdapter(finalConfig); } } export function getLLMAdapter(): LLMServiceAdapter { if (!cachedAdapter) { cachedAdapter = createLLMAdapter(); } return cachedAdapter; } export function resetLLMAdapter(): void { cachedAdapter = null; } // === Config Management === export function loadConfig(): LLMConfig { if (typeof window === 'undefined') { return DEFAULT_CONFIGS.mock; } try { const saved = localStorage.getItem(LLM_CONFIG_KEY); if (saved) { return JSON.parse(saved); } } catch (e) { log.debug('Failed to parse LLM config', { error: e }); } // Default to gateway (ZCLAW passthrough) for L4 self-evolution return DEFAULT_CONFIGS.gateway; } export function saveConfig(config: LLMConfig): void { if (typeof window === 'undefined') return; // Don't save API key to localStorage for security const safeConfig = { ...config }; delete safeConfig.apiKey; localStorage.setItem(LLM_CONFIG_KEY, JSON.stringify(safeConfig)); // Mark config as dirty for SaaS push sync localStorage.setItem('zclaw-config-dirty.llm.default', '1'); resetLLMAdapter(); } // === Prompt Templates === // 硬编码默认值 — 当 SaaS 不可用且本地无缓存时的终极兜底 const HARDCODED_PROMPTS: Record string }> = { reflection: { system: `你是一个 AI Agent 的自我反思引擎。分析最近的对话历史,识别行为模式,并生成改进建议。 输出 JSON 格式: { "patterns": [ { "observation": "观察到的模式描述", "frequency": 数字, "sentiment": "positive/negative/neutral", "evidence": ["证据1", "证据2"] } ], "improvements": [ { "area": "改进领域", "suggestion": "具体建议", "priority": "high/medium/low" } ], "identityProposals": [] }`, user: (context: string) => `分析以下对话历史,进行自我反思:\n\n${context}\n\n请识别行为模式(积极和消极),并提供具体的改进建议。`, }, compaction: { system: `你是一个对话摘要专家。将长对话压缩为简洁的摘要,保留关键信息。 要求: 1. 保留所有重要决策和结论 2. 保留用户偏好和约束 3. 保留未完成的任务 4. 保持时间顺序 5. 摘要应能在后续对话中替代原始内容`, user: (messages: string) => `请将以下对话压缩为简洁摘要,保留关键信息:\n\n${messages}`, }, extraction: { system: `你是一个记忆提取专家。从对话中提取值得长期记住的信息。 提取类型: - fact: 用户告知的事实(如"我的公司叫XXX") - preference: 用户的偏好(如"我喜欢简洁的回答") - lesson: 本次对话的经验教训 - task: 未完成的任务或承诺 输出 JSON 数组: [ { "content": "记忆内容", "type": "fact/preference/lesson/task", "importance": 1-10, "tags": ["标签1", "标签2"] } ]`, user: (conversation: string) => `从以下对话中提取值得长期记住的信息:\n\n${conversation}\n\n如果没有值得记忆的内容,返回空数组 []。`, }, }; // === Prompt Cache (SaaS OTA) === const PROMPT_CACHE_KEY = 'zclaw-prompt-cache'; interface CachedPrompt { name: string; version: number; source: string; system: string; userTemplate: string | null; syncedAt: string; } /** 获取本地缓存的提示词版本号映射 */ function loadPromptCache(): Record { if (typeof window === 'undefined') return {}; try { const raw = localStorage.getItem(PROMPT_CACHE_KEY); return raw ? JSON.parse(raw) : {}; } catch (e) { log.debug('Failed to parse prompt cache', { error: e }); return {}; } } /** 保存提示词缓存到 localStorage */ function savePromptCache(cache: Record): void { if (typeof window === 'undefined') return; localStorage.setItem(PROMPT_CACHE_KEY, JSON.stringify(cache)); } /** * 获取指定提示词的系统提示词 * 优先级:本地缓存 → 硬编码默认值 */ export function getSystemPrompt(name: string): string { const cache = loadPromptCache(); if (cache[name]?.system) { return cache[name].system; } return HARDCODED_PROMPTS[name]?.system ?? ''; } /** * 获取指定提示词的用户提示词模板 * 优先级:本地缓存 → 硬编码默认值 */ export function getUserPromptTemplate(name: string): string | ((arg: string) => string) | null { const cache = loadPromptCache(); if (cache[name]) { const tmpl = cache[name].userTemplate; if (tmpl) return tmpl; } return HARDCODED_PROMPTS[name]?.user ?? null; } /** 获取提示词当前版本号(本地缓存) */ export function getPromptVersion(name: string): number { const cache = loadPromptCache(); return cache[name]?.version ?? 0; } /** 获取所有本地缓存的提示词版本(用于 OTA 检查) */ export function getAllPromptVersions(): Record { const cache = loadPromptCache(); const versions: Record = {}; for (const [name, entry] of Object.entries(cache)) { versions[name] = entry.version; } return versions; } /** * 应用 SaaS OTA 更新到本地缓存 * @param updates 从 SaaS 拉取的更新列表 */ export function applyPromptUpdates(updates: Array<{ name: string; version: number; system_prompt: string; user_prompt_template: string | null; source: string; changelog?: string | null; }>): number { const cache = loadPromptCache(); let applied = 0; for (const update of updates) { cache[update.name] = { name: update.name, version: update.version, source: update.source, system: update.system_prompt, userTemplate: update.user_prompt_template, syncedAt: new Date().toISOString(), }; applied++; } if (applied > 0) { savePromptCache(cache); } return applied; } /** * 后台异步检查 SaaS 提示词更新 * 启动时和每 30 分钟调用一次 */ let promptSyncTimer: ReturnType | null = null; export function startPromptOTASync(deviceId: string): void { if (promptSyncTimer) return; // 已启动 if (typeof window === 'undefined') return; const doSync = async () => { try { const { saasClient } = await import('./saas-client'); const { useSaaSStore } = await import('../store/saasStore'); const { saasUrl, authToken } = useSaaSStore.getState(); if (!saasUrl || !authToken) return; saasClient.setBaseUrl(saasUrl); saasClient.setToken(authToken); const versions = getAllPromptVersions(); const result = await saasClient.checkPromptUpdates(deviceId, versions); if (result.updates.length > 0) { const applied = applyPromptUpdates(result.updates); if (applied > 0) { log.debug(`已更新 ${applied} 个提示词模板`); } } } catch (err) { // 静默失败,不影响正常使用 log.debug('检查更新失败:', err); } }; // 立即执行一次 doSync(); // 每 30 分钟检查一次 promptSyncTimer = setInterval(doSync, 30 * 60 * 1000); } export function stopPromptOTASync(): void { if (promptSyncTimer) { clearInterval(promptSyncTimer); promptSyncTimer = null; } } // 保留向后兼容的 LLM_PROMPTS 导出(读取走 PromptCache) export const LLM_PROMPTS = { get reflection() { return { system: getSystemPrompt('reflection'), user: getUserPromptTemplate('reflection')! }; }, get compaction() { return { system: getSystemPrompt('compaction'), user: getUserPromptTemplate('compaction')! }; }, get extraction() { return { system: getSystemPrompt('extraction'), user: getUserPromptTemplate('extraction')! }; }, }; // === Telemetry Integration === /** * 记录一次 LLM 调用结果到遥测收集器。 * 所有 adapter 的 complete() 返回后应调用此函数。 */ function trackLLMCall( adapter: LLMServiceAdapter, response: LLMResponse, error?: unknown, ): void { try { const { recordLLMUsage } = require('./telemetry-collector'); recordLLMUsage( response.model || adapter.getProvider(), response.tokensUsed?.input ?? 0, response.tokensUsed?.output ?? 0, { latencyMs: response.latencyMs, success: !error, errorType: error instanceof Error ? error.message.slice(0, 80) : undefined, connectionMode: adapter.getProvider() === 'saas' ? 'saas' : 'tauri', }, ); } catch (e) { log.debug('Telemetry recording failed (SSR or unavailable)', { error: e }); } } // === Helper Functions === export async function llmReflect(context: string, adapter?: LLMServiceAdapter): Promise { const llm = adapter || getLLMAdapter(); const response = await llm.complete([ { role: 'system', content: LLM_PROMPTS.reflection.system }, { role: 'user', content: typeof LLM_PROMPTS.reflection.user === 'function' ? LLM_PROMPTS.reflection.user(context) : LLM_PROMPTS.reflection.user }, ]); trackLLMCall(llm, response); return response.content; } export async function llmCompact(messages: string, adapter?: LLMServiceAdapter): Promise { const llm = adapter || getLLMAdapter(); const response = await llm.complete([ { role: 'system', content: LLM_PROMPTS.compaction.system }, { role: 'user', content: typeof LLM_PROMPTS.compaction.user === 'function' ? LLM_PROMPTS.compaction.user(messages) : LLM_PROMPTS.compaction.user }, ]); trackLLMCall(llm, response); return response.content; } export async function llmExtract( conversation: string, adapter?: LLMServiceAdapter ): Promise { const llm = adapter || getLLMAdapter(); const response = await llm.complete([ { role: 'system', content: LLM_PROMPTS.extraction.system }, { role: 'user', content: typeof LLM_PROMPTS.extraction.user === 'function' ? LLM_PROMPTS.extraction.user(conversation) : LLM_PROMPTS.extraction.user }, ]); trackLLMCall(llm, response); return response.content; }