diff --git a/desktop/src/components/ChatArea.tsx b/desktop/src/components/ChatArea.tsx
index 6af5475..0b7fe69 100644
--- a/desktop/src/components/ChatArea.tsx
+++ b/desktop/src/components/ChatArea.tsx
@@ -53,7 +53,7 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
const {
messages, isStreaming, isLoading,
sendMessage: sendToGateway, initStreamListener,
- chatMode, setChatMode, suggestions,
+ chatMode, setChatMode, suggestions, suggestionsLoading,
totalInputTokens, totalOutputTokens,
cancelStream,
} = useChatStore();
@@ -505,9 +505,10 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
{/* Suggestion chips */}
- {!isStreaming && suggestions.length > 0 && !messages.some(m => m.error) && (
+ {!isStreaming && !messages.some(m => m.error) && (suggestions.length > 0 || suggestionsLoading) && (
{ setInput(text); textareaRef.current?.focus(); setTimeout(() => handleSend(), 0); }}
className="mb-3"
/>
diff --git a/desktop/src/components/ai/SuggestionChips.tsx b/desktop/src/components/ai/SuggestionChips.tsx
index 99fa9ba..5c2f143 100644
--- a/desktop/src/components/ai/SuggestionChips.tsx
+++ b/desktop/src/components/ai/SuggestionChips.tsx
@@ -7,15 +7,30 @@ import { motion } from 'framer-motion';
* - Horizontal scrollable chip list
* - Click to fill input
* - Animated entrance
+ * - Loading skeleton while LLM generates suggestions
*/
interface SuggestionChipsProps {
suggestions: string[];
+ loading?: boolean;
onSelect: (text: string) => void;
className?: string;
}
-export function SuggestionChips({ suggestions, onSelect, className = '' }: SuggestionChipsProps) {
+export function SuggestionChips({ suggestions, loading, onSelect, className = '' }: SuggestionChipsProps) {
+ if (loading && suggestions.length === 0) {
+ return (
+
+ {[0, 1, 2].map((i) => (
+
+ ))}
+
+ );
+ }
+
if (suggestions.length === 0) return null;
return (
diff --git a/desktop/src/lib/llm-service.ts b/desktop/src/lib/llm-service.ts
index 4c60ce5..2439efe 100644
--- a/desktop/src/lib/llm-service.ts
+++ b/desktop/src/lib/llm-service.ts
@@ -644,6 +644,21 @@ const HARDCODED_PROMPTS: Record
]`,
user: (conversation: string) => `从以下对话中提取值得长期记住的信息:\n\n${conversation}\n\n如果没有值得记忆的内容,返回空数组 []。`,
},
+
+ suggestions: {
+ system: `你是对话分析助手。根据最近的对话内容,生成 3 个用户可能想继续探讨的问题。
+
+要求:
+- 每个问题必须与对话内容直接相关,具体且有针对性
+- 帮助用户深入理解、实际操作或拓展思路
+- 每个问题不超过 30 个中文字符
+- 不要重复对话中已讨论过的内容
+- 使用与用户相同的语言
+
+只输出 JSON 数组,包含恰好 3 个字符串。不要输出任何其他内容。
+示例:["如何在生产环境中部署?", "这个方案的成本如何?", "有没有更简单的替代方案?"]`,
+ user: (context: string) => `以下是对话中最近的消息:\n\n${context}\n\n请生成 3 个后续问题。`,
+ },
};
// === Prompt Cache (SaaS OTA) ===
@@ -806,6 +821,7 @@ export const LLM_PROMPTS = {
get reflection() { return { system: getSystemPrompt('reflection'), user: getUserPromptTemplate('reflection')! }; },
get compaction() { return { system: getSystemPrompt('compaction'), user: getUserPromptTemplate('compaction')! }; },
get extraction() { return { system: getSystemPrompt('extraction'), user: getUserPromptTemplate('extraction')! }; },
+ get suggestions() { return { system: getSystemPrompt('suggestions'), user: getUserPromptTemplate('suggestions')! }; },
};
// === Telemetry Integration ===
@@ -876,3 +892,18 @@ export async function llmExtract(
trackLLMCall(llm, response);
return response.content;
}
+
+export async function llmSuggest(
+ conversationContext: string,
+ adapter?: LLMServiceAdapter,
+): Promise {
+ const llm = adapter || getLLMAdapter();
+
+ const response = await llm.complete([
+ { role: 'system', content: LLM_PROMPTS.suggestions.system },
+ { role: 'user', content: typeof LLM_PROMPTS.suggestions.user === 'function' ? LLM_PROMPTS.suggestions.user(conversationContext) : LLM_PROMPTS.suggestions.user },
+ ]);
+
+ trackLLMCall(llm, response);
+ return response.content;
+}
diff --git a/desktop/src/store/chat/streamStore.ts b/desktop/src/store/chat/streamStore.ts
index f6d9c9d..7de8482 100644
--- a/desktop/src/store/chat/streamStore.ts
+++ b/desktop/src/store/chat/streamStore.ts
@@ -573,10 +573,8 @@ async function generateLLMSuggestions(
let raw: string;
if (connectionMode === 'saas') {
- // SaaS relay: use saasClient directly for reliable auth
raw = await llmSuggestViaSaaS(context);
} else {
- // Local kernel: use llm-service adapter (GatewayLLMAdapter → agent_chat)
raw = await llmSuggest(context);
}
@@ -596,41 +594,72 @@ async function generateLLMSuggestions(
}
/**
- * Generate suggestions via SaaS relay, using saasStore auth directly.
+ * Generate suggestions via SaaS relay using SSE streaming.
+ * Uses the same streaming path as the main chat to avoid relay timeout issues
+ * with non-streaming requests. Collects the full response from SSE deltas,
+ * then parses the suggestion JSON from the accumulated text.
*/
async function llmSuggestViaSaaS(context: string): Promise {
- const { useSaaSStore } = await import('../saasStore');
- const { saasUrl, authToken } = useSaaSStore.getState();
-
- if (!saasUrl || !authToken) {
- throw new Error('SaaS not authenticated');
- }
-
const { saasClient } = await import('../../lib/saas-client');
- saasClient.setBaseUrl(saasUrl);
- saasClient.setToken(authToken);
+ const { useConversationStore } = await import('./conversationStore');
+ const { useSaaSStore } = await import('../saasStore');
- const response = await saasClient.chatCompletion(
- {
- model: 'default',
- messages: [
- { role: 'system', content: LLM_PROMPTS_SYSTEM },
- { role: 'user', content: `以下是对话中最近的消息:\n\n${context}\n\n请生成 3 个后续问题。` },
- ],
- max_tokens: 500,
- temperature: 0.7,
- stream: false,
- },
- AbortSignal.timeout(15000),
- );
+ const currentModel = useConversationStore.getState().currentModel;
+ const availableModels = useSaaSStore.getState().availableModels;
+ const model = currentModel || (availableModels.length > 0 ? availableModels[0]?.id : undefined);
+ if (!model) throw new Error('No model available for suggestions');
- if (!response.ok) {
- const errText = await response.text().catch(() => 'unknown error');
- throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`);
+ // Delay to avoid concurrent relay requests with memory extraction
+ await new Promise(r => setTimeout(r, 2000));
+
+ const controller = new AbortController();
+ const timeoutId = setTimeout(() => controller.abort(), 60000);
+
+ try {
+ const response = await saasClient.chatCompletion(
+ {
+ model,
+ messages: [
+ { role: 'system', content: LLM_PROMPTS_SYSTEM },
+ { role: 'user', content: `以下是对话中最近的消息:\n\n${context}\n\n请生成 3 个后续问题。` },
+ ],
+ max_tokens: 500,
+ temperature: 0.7,
+ stream: true,
+ },
+ controller.signal,
+ );
+
+ if (!response.ok) {
+ const errText = await response.text().catch(() => 'unknown error');
+ throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`);
+ }
+
+ // Read full response as text — suggestion responses are small (max 500 tokens),
+ // so streaming is unnecessary. This avoids ReadableStream compatibility issues
+ // in Tauri WebView2 where body.getReader() may not yield SSE chunks correctly.
+ const rawText = await response.text();
+ log.debug('[Suggest] Raw response length:', rawText.length);
+
+ // Parse SSE "data:" lines from accumulated text
+ let accumulated = '';
+ for (const line of rawText.split('\n')) {
+ const trimmed = line.trim();
+ if (!trimmed.startsWith('data: ')) continue;
+ const payload = trimmed.slice(6).trim();
+ if (payload === '[DONE]') continue;
+ try {
+ const parsed = JSON.parse(payload);
+ const delta = parsed.choices?.[0]?.delta;
+ if (delta?.content) accumulated += delta.content;
+ } catch { /* skip malformed lines */ }
+ }
+
+ log.debug('[Suggest] Accumulated length:', accumulated.length);
+ return accumulated;
+ } finally {
+ clearTimeout(timeoutId);
}
-
- const data = await response.json();
- return data?.choices?.[0]?.message?.content || '';
}
const LLM_PROMPTS_SYSTEM = `你是对话分析助手。根据最近的对话内容,生成 3 个用户可能想继续探讨的问题。
diff --git a/desktop/src/store/chatStore.ts b/desktop/src/store/chatStore.ts
index fba6047..06a7409 100644
--- a/desktop/src/store/chatStore.ts
+++ b/desktop/src/store/chatStore.ts
@@ -79,6 +79,7 @@ interface ChatState {
totalOutputTokens: number;
chatMode: ChatModeType;
suggestions: string[];
+ suggestionsLoading: boolean;
addMessage: (message: Message) => void;
updateMessage: (id: string, updates: Partial) => void;
@@ -111,6 +112,7 @@ export const useChatStore = create()(
isLoading: false,
chatMode: 'thinking' as ChatModeType,
suggestions: [],
+ suggestionsLoading: false,
totalInputTokens: 0,
totalOutputTokens: 0,
@@ -367,6 +369,7 @@ const unsubStream = useStreamStore.subscribe((state) => {
if (chat.isLoading !== state.isLoading) updates.isLoading = state.isLoading;
if (chat.chatMode !== state.chatMode) updates.chatMode = state.chatMode;
if (chat.suggestions !== state.suggestions) updates.suggestions = state.suggestions;
+ if (chat.suggestionsLoading !== state.suggestionsLoading) updates.suggestionsLoading = state.suggestionsLoading;
if (Object.keys(updates).length > 0) {
useChatStore.setState(updates);
}