feat(chat): LLM 动态对话建议 — 替换硬编码关键词匹配

AI 回复结束后，将最近对话发给 LLM 生成 3 个上下文相关的后续问题，替换原有的"继续深入分析"等泛泛默认建议。变更: - llm-service.ts: 添加 suggestions 提示模板 + llmSuggest() 辅助函数 - streamStore.ts: SSE 流式请求 via SaaS relay，response.text() 一次性读取避免 Tauri WebView2 ReadableStream 兼容问题，失败降级到关键词 - chatStore.ts: suggestionsLoading 状态镜像 - SuggestionChips.tsx: loading 骨架动画 - ChatArea.tsx: 传递 loading prop
2026-04-23 11:41:50 +08:00
parent 3e78dacef3
commit b56d1a4c34
5 changed files with 113 additions and 34 deletions
--- a/desktop/src/components/ChatArea.tsx
+++ b/desktop/src/components/ChatArea.tsx
@@ -53,7 +53,7 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
  const {
    messages, isStreaming, isLoading,
    sendMessage: sendToGateway, initStreamListener,
-    chatMode, setChatMode, suggestions,
+    chatMode, setChatMode, suggestions, suggestionsLoading,
    totalInputTokens, totalOutputTokens,
    cancelStream,
  } = useChatStore();
@@ -505,9 +505,10 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
      <div className="flex-shrink-0 p-4 bg-white dark:bg-gray-900">
        <div className="max-w-4xl mx-auto">
          {/* Suggestion chips */}
-          {!isStreaming && suggestions.length > 0 && !messages.some(m => m.error) && (
+          {!isStreaming && !messages.some(m => m.error) && (suggestions.length > 0 || suggestionsLoading) && (
            <SuggestionChips
              suggestions={suggestions}
              loading={suggestionsLoading}
              onSelect={(text) => { setInput(text); textareaRef.current?.focus(); setTimeout(() => handleSend(), 0); }}
              className="mb-3"
            />
--- a/desktop/src/components/ai/SuggestionChips.tsx
+++ b/desktop/src/components/ai/SuggestionChips.tsx
@@ -7,15 +7,30 @@ import { motion } from 'framer-motion';
 * - Horizontal scrollable chip list
 * - Click to fill input
 * - Animated entrance
 * - Loading skeleton while LLM generates suggestions
 */
 interface SuggestionChipsProps {
  suggestions: string[];
  loading?: boolean;
  onSelect: (text: string) => void;
  className?: string;
 }
-export function SuggestionChips({ suggestions, onSelect, className = '' }: SuggestionChipsProps) {
+export function SuggestionChips({ suggestions, loading, onSelect, className = '' }: SuggestionChipsProps) {
  if (loading && suggestions.length === 0) {
    return (
      <div className={`flex flex-wrap gap-2 ${className}`}>
        {[0, 1, 2].map((i) => (
          <div
            key={i}
            className="h-7 w-28 rounded-full bg-gray-100 dark:bg-gray-800 animate-pulse"
          />
        ))}
      </div>
    );
  }
  if (suggestions.length === 0) return null;
  return (
--- a/desktop/src/lib/llm-service.ts
+++ b/desktop/src/lib/llm-service.ts
@@ -644,6 +644,21 @@ const HARDCODED_PROMPTS: Record<string, { system: string; user: (arg: string) =>
 ]`,
    user: (conversation: string) => `从以下对话中提取值得长期记住的信息：\n\n${conversation}\n\n如果没有值得记忆的内容，返回空数组 []。`,
  },
  suggestions: {
    system: `你是对话分析助手。根据最近的对话内容，生成 3 个用户可能想继续探讨的问题。
 要求：
 - 每个问题必须与对话内容直接相关，具体且有针对性
 - 帮助用户深入理解、实际操作或拓展思路
 - 每个问题不超过 30 个中文字符
 - 不要重复对话中已讨论过的内容
 - 使用与用户相同的语言
 只输出 JSON 数组，包含恰好 3 个字符串。不要输出任何其他内容。
 示例：["如何在生产环境中部署？", "这个方案的成本如何？", "有没有更简单的替代方案？"]`,
    user: (context: string) => `以下是对话中最近的消息：\n\n${context}\n\n请生成 3 个后续问题。`,
  },
 };
 // === Prompt Cache (SaaS OTA) ===
@@ -806,6 +821,7 @@ export const LLM_PROMPTS = {
  get reflection() { return { system: getSystemPrompt('reflection'), user: getUserPromptTemplate('reflection')! }; },
  get compaction() { return { system: getSystemPrompt('compaction'), user: getUserPromptTemplate('compaction')! }; },
  get extraction() { return { system: getSystemPrompt('extraction'), user: getUserPromptTemplate('extraction')! }; },
  get suggestions() { return { system: getSystemPrompt('suggestions'), user: getUserPromptTemplate('suggestions')! }; },
 };
 // === Telemetry Integration ===
@@ -876,3 +892,18 @@ export async function llmExtract(
  trackLLMCall(llm, response);
  return response.content;
 }
 export async function llmSuggest(
  conversationContext: string,
  adapter?: LLMServiceAdapter,
 ): Promise<string> {
  const llm = adapter || getLLMAdapter();
  const response = await llm.complete([
    { role: 'system', content: LLM_PROMPTS.suggestions.system },
    { role: 'user', content: typeof LLM_PROMPTS.suggestions.user === 'function' ? LLM_PROMPTS.suggestions.user(conversationContext) : LLM_PROMPTS.suggestions.user },
  ]);
  trackLLMCall(llm, response);
  return response.content;
 }
--- a/desktop/src/store/chat/streamStore.ts
+++ b/desktop/src/store/chat/streamStore.ts
@@ -573,10 +573,8 @@ async function generateLLMSuggestions(
    let raw: string;
    if (connectionMode === 'saas') {
      // SaaS relay: use saasClient directly for reliable auth
      raw = await llmSuggestViaSaaS(context);
    } else {
      // Local kernel: use llm-service adapter (GatewayLLMAdapter → agent_chat)
      raw = await llmSuggest(context);
    }
@@ -596,32 +594,40 @@ async function generateLLMSuggestions(
 }
 /**
- * Generate suggestions via SaaS relay, using saasStore auth directly.
+ * Generate suggestions via SaaS relay using SSE streaming.
 * Uses the same streaming path as the main chat to avoid relay timeout issues
 * with non-streaming requests. Collects the full response from SSE deltas,
 * then parses the suggestion JSON from the accumulated text.
 */
 async function llmSuggestViaSaaS(context: string): Promise<string> {
  const { useSaaSStore } = await import('../saasStore');
  const { saasUrl, authToken } = useSaaSStore.getState();
  if (!saasUrl || !authToken) {
    throw new Error('SaaS not authenticated');
  }
  const { saasClient } = await import('../../lib/saas-client');
-  saasClient.setBaseUrl(saasUrl);
+  const { useConversationStore } = await import('./conversationStore');
-  saasClient.setToken(authToken);
+  const { useSaaSStore } = await import('../saasStore');
  const currentModel = useConversationStore.getState().currentModel;
  const availableModels = useSaaSStore.getState().availableModels;
  const model = currentModel || (availableModels.length > 0 ? availableModels[0]?.id : undefined);
  if (!model) throw new Error('No model available for suggestions');
  // Delay to avoid concurrent relay requests with memory extraction
  await new Promise(r => setTimeout(r, 2000));
  const controller = new AbortController();
  const timeoutId = setTimeout(() => controller.abort(), 60000);
  try {
    const response = await saasClient.chatCompletion(
      {
-      model: 'default',
+        model,
        messages: [
          { role: 'system', content: LLM_PROMPTS_SYSTEM },
          { role: 'user', content: `以下是对话中最近的消息：\n\n${context}\n\n请生成 3 个后续问题。` },
        ],
        max_tokens: 500,
        temperature: 0.7,
-      stream: false,
+        stream: true,
      },
-    AbortSignal.timeout(15000),
+      controller.signal,
    );
    if (!response.ok) {
@@ -629,8 +635,31 @@ async function llmSuggestViaSaaS(context: string): Promise<string> {
      throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`);
    }
-  const data = await response.json();
+    // Read full response as text — suggestion responses are small (max 500 tokens),
-  return data?.choices?.[0]?.message?.content || '';
+    // so streaming is unnecessary. This avoids ReadableStream compatibility issues
    // in Tauri WebView2 where body.getReader() may not yield SSE chunks correctly.
    const rawText = await response.text();
    log.debug('[Suggest] Raw response length:', rawText.length);
    // Parse SSE "data:" lines from accumulated text
    let accumulated = '';
    for (const line of rawText.split('\n')) {
      const trimmed = line.trim();
      if (!trimmed.startsWith('data: ')) continue;
      const payload = trimmed.slice(6).trim();
      if (payload === '[DONE]') continue;
      try {
        const parsed = JSON.parse(payload);
        const delta = parsed.choices?.[0]?.delta;
        if (delta?.content) accumulated += delta.content;
      } catch { /* skip malformed lines */ }
    }
    log.debug('[Suggest] Accumulated length:', accumulated.length);
    return accumulated;
  } finally {
    clearTimeout(timeoutId);
  }
 }
 const LLM_PROMPTS_SYSTEM = `你是对话分析助手。根据最近的对话内容，生成 3 个用户可能想继续探讨的问题。
--- a/desktop/src/store/chatStore.ts
+++ b/desktop/src/store/chatStore.ts
@@ -79,6 +79,7 @@ interface ChatState {
  totalOutputTokens: number;
  chatMode: ChatModeType;
  suggestions: string[];
  suggestionsLoading: boolean;
  addMessage: (message: Message) => void;
  updateMessage: (id: string, updates: Partial<Message>) => void;
@@ -111,6 +112,7 @@ export const useChatStore = create<ChatState>()(
  isLoading: false,
  chatMode: 'thinking' as ChatModeType,
  suggestions: [],
  suggestionsLoading: false,
  totalInputTokens: 0,
  totalOutputTokens: 0,
@@ -367,6 +369,7 @@ const unsubStream = useStreamStore.subscribe((state) => {
  if (chat.isLoading !== state.isLoading) updates.isLoading = state.isLoading;
  if (chat.chatMode !== state.chatMode) updates.chatMode = state.chatMode;
  if (chat.suggestions !== state.suggestions) updates.suggestions = state.suggestions;
  if (chat.suggestionsLoading !== state.suggestionsLoading) updates.suggestionsLoading = state.suggestionsLoading;
  if (Object.keys(updates).length > 0) {
    useChatStore.setState(updates);
  }