diff --git a/desktop/src/components/ChatArea.tsx b/desktop/src/components/ChatArea.tsx index 6af5475..0b7fe69 100644 --- a/desktop/src/components/ChatArea.tsx +++ b/desktop/src/components/ChatArea.tsx @@ -53,7 +53,7 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD const { messages, isStreaming, isLoading, sendMessage: sendToGateway, initStreamListener, - chatMode, setChatMode, suggestions, + chatMode, setChatMode, suggestions, suggestionsLoading, totalInputTokens, totalOutputTokens, cancelStream, } = useChatStore(); @@ -505,9 +505,10 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
{/* Suggestion chips */} - {!isStreaming && suggestions.length > 0 && !messages.some(m => m.error) && ( + {!isStreaming && !messages.some(m => m.error) && (suggestions.length > 0 || suggestionsLoading) && ( { setInput(text); textareaRef.current?.focus(); setTimeout(() => handleSend(), 0); }} className="mb-3" /> diff --git a/desktop/src/components/ai/SuggestionChips.tsx b/desktop/src/components/ai/SuggestionChips.tsx index 99fa9ba..5c2f143 100644 --- a/desktop/src/components/ai/SuggestionChips.tsx +++ b/desktop/src/components/ai/SuggestionChips.tsx @@ -7,15 +7,30 @@ import { motion } from 'framer-motion'; * - Horizontal scrollable chip list * - Click to fill input * - Animated entrance + * - Loading skeleton while LLM generates suggestions */ interface SuggestionChipsProps { suggestions: string[]; + loading?: boolean; onSelect: (text: string) => void; className?: string; } -export function SuggestionChips({ suggestions, onSelect, className = '' }: SuggestionChipsProps) { +export function SuggestionChips({ suggestions, loading, onSelect, className = '' }: SuggestionChipsProps) { + if (loading && suggestions.length === 0) { + return ( +
+ {[0, 1, 2].map((i) => ( +
+ ))} +
+ ); + } + if (suggestions.length === 0) return null; return ( diff --git a/desktop/src/lib/llm-service.ts b/desktop/src/lib/llm-service.ts index 4c60ce5..2439efe 100644 --- a/desktop/src/lib/llm-service.ts +++ b/desktop/src/lib/llm-service.ts @@ -644,6 +644,21 @@ const HARDCODED_PROMPTS: Record ]`, user: (conversation: string) => `从以下对话中提取值得长期记住的信息:\n\n${conversation}\n\n如果没有值得记忆的内容,返回空数组 []。`, }, + + suggestions: { + system: `你是对话分析助手。根据最近的对话内容,生成 3 个用户可能想继续探讨的问题。 + +要求: +- 每个问题必须与对话内容直接相关,具体且有针对性 +- 帮助用户深入理解、实际操作或拓展思路 +- 每个问题不超过 30 个中文字符 +- 不要重复对话中已讨论过的内容 +- 使用与用户相同的语言 + +只输出 JSON 数组,包含恰好 3 个字符串。不要输出任何其他内容。 +示例:["如何在生产环境中部署?", "这个方案的成本如何?", "有没有更简单的替代方案?"]`, + user: (context: string) => `以下是对话中最近的消息:\n\n${context}\n\n请生成 3 个后续问题。`, + }, }; // === Prompt Cache (SaaS OTA) === @@ -806,6 +821,7 @@ export const LLM_PROMPTS = { get reflection() { return { system: getSystemPrompt('reflection'), user: getUserPromptTemplate('reflection')! }; }, get compaction() { return { system: getSystemPrompt('compaction'), user: getUserPromptTemplate('compaction')! }; }, get extraction() { return { system: getSystemPrompt('extraction'), user: getUserPromptTemplate('extraction')! }; }, + get suggestions() { return { system: getSystemPrompt('suggestions'), user: getUserPromptTemplate('suggestions')! }; }, }; // === Telemetry Integration === @@ -876,3 +892,18 @@ export async function llmExtract( trackLLMCall(llm, response); return response.content; } + +export async function llmSuggest( + conversationContext: string, + adapter?: LLMServiceAdapter, +): Promise { + const llm = adapter || getLLMAdapter(); + + const response = await llm.complete([ + { role: 'system', content: LLM_PROMPTS.suggestions.system }, + { role: 'user', content: typeof LLM_PROMPTS.suggestions.user === 'function' ? LLM_PROMPTS.suggestions.user(conversationContext) : LLM_PROMPTS.suggestions.user }, + ]); + + trackLLMCall(llm, response); + return response.content; +} diff --git a/desktop/src/store/chat/streamStore.ts b/desktop/src/store/chat/streamStore.ts index f6d9c9d..7de8482 100644 --- a/desktop/src/store/chat/streamStore.ts +++ b/desktop/src/store/chat/streamStore.ts @@ -573,10 +573,8 @@ async function generateLLMSuggestions( let raw: string; if (connectionMode === 'saas') { - // SaaS relay: use saasClient directly for reliable auth raw = await llmSuggestViaSaaS(context); } else { - // Local kernel: use llm-service adapter (GatewayLLMAdapter → agent_chat) raw = await llmSuggest(context); } @@ -596,41 +594,72 @@ async function generateLLMSuggestions( } /** - * Generate suggestions via SaaS relay, using saasStore auth directly. + * Generate suggestions via SaaS relay using SSE streaming. + * Uses the same streaming path as the main chat to avoid relay timeout issues + * with non-streaming requests. Collects the full response from SSE deltas, + * then parses the suggestion JSON from the accumulated text. */ async function llmSuggestViaSaaS(context: string): Promise { - const { useSaaSStore } = await import('../saasStore'); - const { saasUrl, authToken } = useSaaSStore.getState(); - - if (!saasUrl || !authToken) { - throw new Error('SaaS not authenticated'); - } - const { saasClient } = await import('../../lib/saas-client'); - saasClient.setBaseUrl(saasUrl); - saasClient.setToken(authToken); + const { useConversationStore } = await import('./conversationStore'); + const { useSaaSStore } = await import('../saasStore'); - const response = await saasClient.chatCompletion( - { - model: 'default', - messages: [ - { role: 'system', content: LLM_PROMPTS_SYSTEM }, - { role: 'user', content: `以下是对话中最近的消息:\n\n${context}\n\n请生成 3 个后续问题。` }, - ], - max_tokens: 500, - temperature: 0.7, - stream: false, - }, - AbortSignal.timeout(15000), - ); + const currentModel = useConversationStore.getState().currentModel; + const availableModels = useSaaSStore.getState().availableModels; + const model = currentModel || (availableModels.length > 0 ? availableModels[0]?.id : undefined); + if (!model) throw new Error('No model available for suggestions'); - if (!response.ok) { - const errText = await response.text().catch(() => 'unknown error'); - throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`); + // Delay to avoid concurrent relay requests with memory extraction + await new Promise(r => setTimeout(r, 2000)); + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 60000); + + try { + const response = await saasClient.chatCompletion( + { + model, + messages: [ + { role: 'system', content: LLM_PROMPTS_SYSTEM }, + { role: 'user', content: `以下是对话中最近的消息:\n\n${context}\n\n请生成 3 个后续问题。` }, + ], + max_tokens: 500, + temperature: 0.7, + stream: true, + }, + controller.signal, + ); + + if (!response.ok) { + const errText = await response.text().catch(() => 'unknown error'); + throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`); + } + + // Read full response as text — suggestion responses are small (max 500 tokens), + // so streaming is unnecessary. This avoids ReadableStream compatibility issues + // in Tauri WebView2 where body.getReader() may not yield SSE chunks correctly. + const rawText = await response.text(); + log.debug('[Suggest] Raw response length:', rawText.length); + + // Parse SSE "data:" lines from accumulated text + let accumulated = ''; + for (const line of rawText.split('\n')) { + const trimmed = line.trim(); + if (!trimmed.startsWith('data: ')) continue; + const payload = trimmed.slice(6).trim(); + if (payload === '[DONE]') continue; + try { + const parsed = JSON.parse(payload); + const delta = parsed.choices?.[0]?.delta; + if (delta?.content) accumulated += delta.content; + } catch { /* skip malformed lines */ } + } + + log.debug('[Suggest] Accumulated length:', accumulated.length); + return accumulated; + } finally { + clearTimeout(timeoutId); } - - const data = await response.json(); - return data?.choices?.[0]?.message?.content || ''; } const LLM_PROMPTS_SYSTEM = `你是对话分析助手。根据最近的对话内容,生成 3 个用户可能想继续探讨的问题。 diff --git a/desktop/src/store/chatStore.ts b/desktop/src/store/chatStore.ts index fba6047..06a7409 100644 --- a/desktop/src/store/chatStore.ts +++ b/desktop/src/store/chatStore.ts @@ -79,6 +79,7 @@ interface ChatState { totalOutputTokens: number; chatMode: ChatModeType; suggestions: string[]; + suggestionsLoading: boolean; addMessage: (message: Message) => void; updateMessage: (id: string, updates: Partial) => void; @@ -111,6 +112,7 @@ export const useChatStore = create()( isLoading: false, chatMode: 'thinking' as ChatModeType, suggestions: [], + suggestionsLoading: false, totalInputTokens: 0, totalOutputTokens: 0, @@ -367,6 +369,7 @@ const unsubStream = useStreamStore.subscribe((state) => { if (chat.isLoading !== state.isLoading) updates.isLoading = state.isLoading; if (chat.chatMode !== state.chatMode) updates.chatMode = state.chatMode; if (chat.suggestions !== state.suggestions) updates.suggestions = state.suggestions; + if (chat.suggestionsLoading !== state.suggestionsLoading) updates.suggestionsLoading = state.suggestionsLoading; if (Object.keys(updates).length > 0) { useChatStore.setState(updates); }