feat(chat): LLM 动态对话建议 — 替换硬编码关键词匹配

AI 回复结束后，将最近对话发给 LLM 生成 3 个上下文相关的后续问题，替换原有的"继续深入分析"等泛泛默认建议。变更: - llm-service.ts: 添加 suggestions 提示模板 + llmSuggest() 辅助函数 - streamStore.ts: SSE 流式请求 via SaaS relay，response.text() 一次性读取避免 Tauri WebView2 ReadableStream 兼容问题，失败降级到关键词 - chatStore.ts: suggestionsLoading 状态镜像 - SuggestionChips.tsx: loading 骨架动画 - ChatArea.tsx: 传递 loading prop
2026-04-23 11:41:50 +08:00
parent 3e78dacef3
commit b56d1a4c34
5 changed files with 113 additions and 34 deletions
--- a/desktop/src/store/chat/streamStore.ts
+++ b/desktop/src/store/chat/streamStore.ts
@@ -573,10 +573,8 @@ async function generateLLMSuggestions(
    let raw: string;

    if (connectionMode === 'saas') {
-      // SaaS relay: use saasClient directly for reliable auth
      raw = await llmSuggestViaSaaS(context);
    } else {
-      // Local kernel: use llm-service adapter (GatewayLLMAdapter → agent_chat)
      raw = await llmSuggest(context);
    }

@@ -596,41 +594,72 @@ async function generateLLMSuggestions(
 }

 /**
- * Generate suggestions via SaaS relay, using saasStore auth directly.
+ * Generate suggestions via SaaS relay using SSE streaming.
+ * Uses the same streaming path as the main chat to avoid relay timeout issues
+ * with non-streaming requests. Collects the full response from SSE deltas,
+ * then parses the suggestion JSON from the accumulated text.
 */
 async function llmSuggestViaSaaS(context: string): Promise<string> {
-  const { useSaaSStore } = await import('../saasStore');
-  const { saasUrl, authToken } = useSaaSStore.getState();
-
-  if (!saasUrl || !authToken) {
-    throw new Error('SaaS not authenticated');
-  }
-
  const { saasClient } = await import('../../lib/saas-client');
-  saasClient.setBaseUrl(saasUrl);
-  saasClient.setToken(authToken);
+  const { useConversationStore } = await import('./conversationStore');
+  const { useSaaSStore } = await import('../saasStore');

-  const response = await saasClient.chatCompletion(
-    {
-      model: 'default',
-      messages: [
-        { role: 'system', content: LLM_PROMPTS_SYSTEM },
-        { role: 'user', content: `以下是对话中最近的消息：\n\n${context}\n\n请生成 3 个后续问题。` },
-      ],
-      max_tokens: 500,
-      temperature: 0.7,
-      stream: false,
-    },
-    AbortSignal.timeout(15000),
-  );
+  const currentModel = useConversationStore.getState().currentModel;
+  const availableModels = useSaaSStore.getState().availableModels;
+  const model = currentModel || (availableModels.length > 0 ? availableModels[0]?.id : undefined);
+  if (!model) throw new Error('No model available for suggestions');

-  if (!response.ok) {
-    const errText = await response.text().catch(() => 'unknown error');
-    throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`);
+  // Delay to avoid concurrent relay requests with memory extraction
+  await new Promise(r => setTimeout(r, 2000));
+
+  const controller = new AbortController();
+  const timeoutId = setTimeout(() => controller.abort(), 60000);
+
+  try {
+    const response = await saasClient.chatCompletion(
+      {
+        model,
+        messages: [
+          { role: 'system', content: LLM_PROMPTS_SYSTEM },
+          { role: 'user', content: `以下是对话中最近的消息：\n\n${context}\n\n请生成 3 个后续问题。` },
+        ],
+        max_tokens: 500,
+        temperature: 0.7,
+        stream: true,
+      },
+      controller.signal,
+    );
+
+    if (!response.ok) {
+      const errText = await response.text().catch(() => 'unknown error');
+      throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`);
+    }
+
+    // Read full response as text — suggestion responses are small (max 500 tokens),
+    // so streaming is unnecessary. This avoids ReadableStream compatibility issues
+    // in Tauri WebView2 where body.getReader() may not yield SSE chunks correctly.
+    const rawText = await response.text();
+    log.debug('[Suggest] Raw response length:', rawText.length);
+
+    // Parse SSE "data:" lines from accumulated text
+    let accumulated = '';
+    for (const line of rawText.split('\n')) {
+      const trimmed = line.trim();
+      if (!trimmed.startsWith('data: ')) continue;
+      const payload = trimmed.slice(6).trim();
+      if (payload === '[DONE]') continue;
+      try {
+        const parsed = JSON.parse(payload);
+        const delta = parsed.choices?.[0]?.delta;
+        if (delta?.content) accumulated += delta.content;
+      } catch { /* skip malformed lines */ }
+    }
+
+    log.debug('[Suggest] Accumulated length:', accumulated.length);
+    return accumulated;
+  } finally {
+    clearTimeout(timeoutId);
  }
-
-  const data = await response.json();
-  return data?.choices?.[0]?.message?.content || '';
 }

 const LLM_PROMPTS_SYSTEM = `你是对话分析助手。根据最近的对话内容，生成 3 个用户可能想继续探讨的问题。