zclaw_openfang/desktop/src/lib/llm-service.ts

/**
 * LLM Service Adapter - Unified LLM interface for L4 self-evolution engines
 *
 * Provides a unified interface for:
 * - ReflectionEngine: Semantic analysis + deep reflection
 * - ContextCompactor: High-quality summarization
 * - MemoryExtractor: Semantic importance scoring
 *
 * Supports multiple backends:
 * - OpenAI (GPT-4, GPT-3.5)
 * - Volcengine (Doubao)
 * - ZCLAW Gateway (passthrough)
 *
 * Part of ZCLAW L4 Self-Evolution capability.
 */

import { DEFAULT_MODEL_ID, DEFAULT_OPENAI_BASE_URL } from '../constants/models';

// === Types ===

export type LLMProvider = 'openai' | 'volcengine' | 'gateway' | 'mock';

export interface LLMConfig {
  provider: LLMProvider;
  model?: string;
  apiKey?: string;
  apiBase?: string;
  maxTokens?: number;
  temperature?: number;
  timeout?: number;
}

export interface LLMMessage {
  role: 'system' | 'user' | 'assistant';
  content: string;
}

export interface LLMResponse {
  content: string;
  tokensUsed?: {
    input: number;
    output: number;
  };
  model?: string;
  latencyMs?: number;
}

export interface LLMServiceAdapter {
  complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse>;
  isAvailable(): boolean;
  getProvider(): LLMProvider;
}

// === Default Configs ===

const DEFAULT_CONFIGS: Record<LLMProvider, LLMConfig> = {
  openai: {
    provider: 'openai',
    model: DEFAULT_MODEL_ID,
    apiBase: DEFAULT_OPENAI_BASE_URL,
    maxTokens: 2000,
    temperature: 0.7,
    timeout: 30000,
  },
  volcengine: {
    provider: 'volcengine',
    model: 'doubao-pro-32k',
    apiBase: 'https://ark.cn-beijing.volces.com/api/v3',
    maxTokens: 2000,
    temperature: 0.7,
    timeout: 30000,
  },
  gateway: {
    provider: 'gateway',
    apiBase: '/api/llm',
    maxTokens: 2000,
    temperature: 0.7,
    timeout: 60000,
  },
  mock: {
    provider: 'mock',
    maxTokens: 100,
    temperature: 0,
    timeout: 100,
  },
};

// === Storage ===

const LLM_CONFIG_KEY = 'zclaw-llm-config';

// === Mock Adapter (for testing) ===

class MockLLMAdapter implements LLMServiceAdapter {
  constructor(_config: LLMConfig) {
    // Config is stored for future use (e.g., custom mock behavior based on config)
  }

  async complete(messages: LLMMessage[]): Promise<LLMResponse> {
    // Simulate latency
    await new Promise((resolve) => setTimeout(resolve, 50));

    const lastMessage = messages[messages.length - 1];
    const content = lastMessage?.content || '';

    // Generate mock response based on content type
    let response = '[Mock LLM Response] ';

    if (content.includes('reflect') || content.includes('反思')) {
      response += JSON.stringify({
        patterns: [
          {
            observation: '用户经常询问代码优化相关问题',
            frequency: 5,
            sentiment: 'positive',
            evidence: ['多次讨论性能优化', '关注代码质量'],
          },
        ],
        improvements: [
          {
            area: '代码解释',
            suggestion: '可以提供更详细的代码注释',
            priority: 'medium',
          },
        ],
        identityProposals: [],
      });
    } else if (content.includes('summarize') || content.includes('摘要')) {
      response += '这是一个关于对话内容的摘要，包含了主要讨论的要点和结论。';
    } else if (content.includes('importance') || content.includes('重要性')) {
      response += JSON.stringify({
        memories: [
          { content: '用户偏好简洁的回答', importance: 7, type: 'preference' },
        ],
      });
    } else {
      response += 'Processed: ' + content.slice(0, 50);
    }

    return {
      content: response,
      tokensUsed: { input: content.length / 4, output: response.length / 4 },
      model: 'mock-model',
      latencyMs: 50,
    };
  }

  isAvailable(): boolean {
    return true;
  }

  getProvider(): LLMProvider {
    return 'mock';
  }
}

// === OpenAI Adapter ===

class OpenAILLMAdapter implements LLMServiceAdapter {
  private config: LLMConfig;

  constructor(config: LLMConfig) {
    this.config = { ...DEFAULT_CONFIGS.openai, ...config };
  }

  async complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse> {
    const config = { ...this.config, ...options };
    const startTime = Date.now();

    if (!config.apiKey) {
      throw new Error('[OpenAI] API key not configured');
    }

    const response = await fetch(`${config.apiBase}/chat/completions`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        Authorization: `Bearer ${config.apiKey}`,
      },
      body: JSON.stringify({
        model: config.model,
        messages,
        max_tokens: config.maxTokens,
        temperature: config.temperature,
      }),
      signal: AbortSignal.timeout(config.timeout || 30000),
    });

    if (!response.ok) {
      const errorBody = await response.text();
      // Log full error in development only
      if (import.meta.env.DEV) {
        console.error('[OpenAI] API error:', errorBody);
      }
      // Return sanitized error to caller
      throw new Error(`[OpenAI] API error: ${response.status} - Request failed`);
    }

    const data = await response.json();
    const latencyMs = Date.now() - startTime;

    return {
      content: data.choices[0]?.message?.content || '',
      tokensUsed: {
        input: data.usage?.prompt_tokens || 0,
        output: data.usage?.completion_tokens || 0,
      },
      model: data.model,
      latencyMs,
    };
  }

  isAvailable(): boolean {
    return !!this.config.apiKey;
  }

  getProvider(): LLMProvider {
    return 'openai';
  }
}

// === Volcengine Adapter ===

class VolcengineLLMAdapter implements LLMServiceAdapter {
  private config: LLMConfig;

  constructor(config: LLMConfig) {
    this.config = { ...DEFAULT_CONFIGS.volcengine, ...config };
  }

  async complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse> {
    const config = { ...this.config, ...options };
    const startTime = Date.now();

    if (!config.apiKey) {
      throw new Error('[Volcengine] API key not configured');
    }

    const response = await fetch(`${config.apiBase}/chat/completions`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        Authorization: `Bearer ${config.apiKey}`,
      },
      body: JSON.stringify({
        model: config.model,
        messages,
        max_tokens: config.maxTokens,
        temperature: config.temperature,
      }),
      signal: AbortSignal.timeout(config.timeout || 30000),
    });

    if (!response.ok) {
      const errorBody = await response.text();
      // Log full error in development only
      if (import.meta.env.DEV) {
        console.error('[Volcengine] API error:', errorBody);
      }
      // Return sanitized error to caller
      throw new Error(`[Volcengine] API error: ${response.status} - Request failed`);
    }

    const data = await response.json();
    const latencyMs = Date.now() - startTime;

    return {
      content: data.choices[0]?.message?.content || '',
      tokensUsed: {
        input: data.usage?.prompt_tokens || 0,
        output: data.usage?.completion_tokens || 0,
      },
      model: data.model,
      latencyMs,
    };
  }

  isAvailable(): boolean {
    return !!this.config.apiKey;
  }

  getProvider(): LLMProvider {
    return 'volcengine';
  }
}

// === Gateway Adapter (pass through to ZCLAW or internal Kernel) ===

class GatewayLLMAdapter implements LLMServiceAdapter {
  private config: LLMConfig;

  constructor(config: LLMConfig) {
    this.config = { ...DEFAULT_CONFIGS.gateway, ...config };
  }

  async complete(messages: LLMMessage[], options?: Partial<LLMConfig>): Promise<LLMResponse> {
    const config = { ...this.config, ...options };
    const startTime = Date.now();

    // Build a single prompt from messages
    const systemMessage = messages.find(m => m.role === 'system')?.content || '';
    const userMessage = messages.find(m => m.role === 'user')?.content || '';

    // Combine system and user messages into a single prompt
    const fullPrompt = systemMessage
      ? `${systemMessage}\n\n${userMessage}`
      : userMessage;

    // Check if running in Tauri with internal kernel
    // Use the same detection as kernel-client.ts
    const isTauri = typeof window !== 'undefined' &&
      '__TAURI_INTERNALS__' in window;

    if (isTauri) {
      // Use internal Kernel via Tauri invoke
      try {
        const { invoke } = await import('@tauri-apps/api/core');

        // Get the default agent ID from connectionStore or use the first agent
        const agentId = localStorage.getItem('zclaw-default-agent-id');

        const response = await invoke<{ content: string; input_tokens: number; output_tokens: number }>('agent_chat', {
          request: {
            agentId: agentId || null,  // null will use default agent
            message: fullPrompt,
          },
        });

        const latencyMs = Date.now() - startTime;
        return {
          content: response.content || '',
          tokensUsed: {
            input: response.input_tokens || 0,
            output: response.output_tokens || 0,
          },
          latencyMs,
        };
      } catch (err) {
        console.warn('[LLMService] Kernel chat failed, falling back to mock:', err);
        // Return empty response instead of throwing
        return {
          content: '',
          tokensUsed: { input: 0, output: 0 },
          latencyMs: Date.now() - startTime,
        };
      }
    }

    // External Gateway mode: Use ZCLAW's chat endpoint
    const agentId = localStorage.getItem('zclaw-default-agent-id') || 'default';

    const response = await fetch(`/api/agents/${agentId}/message`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        message: fullPrompt,
        max_tokens: config.maxTokens,
        temperature: config.temperature ?? 0.3, // Lower temperature for extraction tasks
      }),
      signal: AbortSignal.timeout(config.timeout || 60000),
    });

    if (!response.ok) {
      const error = await response.text();
      // If agent not found, try without agent ID (direct /api/chat)
      if (response.status === 404) {
        const fallbackResponse = await fetch('/api/chat', {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({
            message: fullPrompt,
            max_tokens: config.maxTokens,
            temperature: config.temperature ?? 0.3,
          }),
          signal: AbortSignal.timeout(config.timeout || 60000),
        });

        if (!fallbackResponse.ok) {
          throw new Error(`[Gateway] Both endpoints failed: ${fallbackResponse.status}`);
        }

        const data = await fallbackResponse.json();
        const latencyMs = Date.now() - startTime;
        return {
          content: data.response || data.content || '',
          tokensUsed: { input: data.input_tokens || 0, output: data.output_tokens || 0 },
          latencyMs,
        };
      }
      throw new Error(`[Gateway] API error: ${response.status} - ${error}`);
    }

    const data = await response.json();
    const latencyMs = Date.now() - startTime;

    return {
      content: data.response || data.content || '',
      tokensUsed: { input: data.input_tokens || 0, output: data.output_tokens || 0 },
      latencyMs,
    };
  }

  isAvailable(): boolean {
    // Gateway is available if we're in browser (can connect to ZCLAW)
    return typeof window !== 'undefined';
  }

  getProvider(): LLMProvider {
    return 'gateway';
  }
}

// === Factory ===

let cachedAdapter: LLMServiceAdapter | null = null;

export function createLLMAdapter(config?: Partial<LLMConfig>): LLMServiceAdapter {
  const savedConfig = loadConfig();
  const finalConfig = { ...savedConfig, ...config };

  switch (finalConfig.provider) {
    case 'openai':
      return new OpenAILLMAdapter(finalConfig);
    case 'volcengine':
      return new VolcengineLLMAdapter(finalConfig);
    case 'gateway':
      return new GatewayLLMAdapter(finalConfig);
    case 'mock':
    default:
      return new MockLLMAdapter(finalConfig);
  }
}

export function getLLMAdapter(): LLMServiceAdapter {
  if (!cachedAdapter) {
    cachedAdapter = createLLMAdapter();
  }
  return cachedAdapter;
}

export function resetLLMAdapter(): void {
  cachedAdapter = null;
}

// === Config Management ===

export function loadConfig(): LLMConfig {
  if (typeof window === 'undefined') {
    return DEFAULT_CONFIGS.mock;
  }

  try {
    const saved = localStorage.getItem(LLM_CONFIG_KEY);
    if (saved) {
      return JSON.parse(saved);
    }
  } catch {
    // Ignore parse errors
  }

  // Default to gateway (ZCLAW passthrough) for L4 self-evolution
  return DEFAULT_CONFIGS.gateway;
}

export function saveConfig(config: LLMConfig): void {
  if (typeof window === 'undefined') return;

  // Don't save API key to localStorage for security
  const safeConfig = { ...config };
  delete safeConfig.apiKey;

  localStorage.setItem(LLM_CONFIG_KEY, JSON.stringify(safeConfig));
  resetLLMAdapter();
}

// === Prompt Templates ===

export const LLM_PROMPTS = {
  reflection: {
    system: `你是一个 AI Agent 的自我反思引擎。分析最近的对话历史，识别行为模式，并生成改进建议。

输出 JSON 格式：
{
  "patterns": [
    {
      "observation": "观察到的模式描述",
      "frequency": 数字,
      "sentiment": "positive/negative/neutral",
      "evidence": ["证据1", "证据2"]
    }
  ],
  "improvements": [
    {
      "area": "改进领域",
      "suggestion": "具体建议",
      "priority": "high/medium/low"
    }
  ],
  "identityProposals": []
}`,
    user: (context: string) => `分析以下对话历史，进行自我反思：

${context}

请识别行为模式（积极和消极），并提供具体的改进建议。`,
  },

  compaction: {
    system: `你是一个对话摘要专家。将长对话压缩为简洁的摘要，保留关键信息。

要求：
1. 保留所有重要决策和结论
2. 保留用户偏好和约束
3. 保留未完成的任务
4. 保持时间顺序
5. 摘要应能在后续对话中替代原始内容`,
    user: (messages: string) => `请将以下对话压缩为简洁摘要，保留关键信息：

${messages}`,
  },

  extraction: {
    system: `你是一个记忆提取专家。从对话中提取值得长期记住的信息。

提取类型：
- fact: 用户告知的事实（如"我的公司叫XXX"）
- preference: 用户的偏好（如"我喜欢简洁的回答"）
- lesson: 本次对话的经验教训
- task: 未完成的任务或承诺

输出 JSON 数组：
[
  {
    "content": "记忆内容",
    "type": "fact/preference/lesson/task",
    "importance": 1-10,
    "tags": ["标签1", "标签2"]
  }
]`,
    user: (conversation: string) => `从以下对话中提取值得长期记住的信息：

${conversation}

如果没有值得记忆的内容，返回空数组 []。`,
  },
};

// === Helper Functions ===

export async function llmReflect(context: string, adapter?: LLMServiceAdapter): Promise<string> {
  const llm = adapter || getLLMAdapter();

  const response = await llm.complete([
    { role: 'system', content: LLM_PROMPTS.reflection.system },
    { role: 'user', content: LLM_PROMPTS.reflection.user(context) },
  ]);

  return response.content;
}

export async function llmCompact(messages: string, adapter?: LLMServiceAdapter): Promise<string> {
  const llm = adapter || getLLMAdapter();

  const response = await llm.complete([
    { role: 'system', content: LLM_PROMPTS.compaction.system },
    { role: 'user', content: LLM_PROMPTS.compaction.user(messages) },
  ]);

  return response.content;
}

export async function llmExtract(
  conversation: string,
  adapter?: LLMServiceAdapter
): Promise<string> {
  const llm = adapter || getLLMAdapter();

  const response = await llm.complete([
    { role: 'system', content: LLM_PROMPTS.extraction.system },
    { role: 'user', content: LLM_PROMPTS.extraction.user(conversation) },
  ]);

  return response.content;
}