diff --git a/desktop/src/components/ChatArea.tsx b/desktop/src/components/ChatArea.tsx
index 6af5475..0b7fe69 100644
--- a/desktop/src/components/ChatArea.tsx
+++ b/desktop/src/components/ChatArea.tsx
@@ -53,7 +53,7 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
   const {
     messages, isStreaming, isLoading,
     sendMessage: sendToGateway, initStreamListener,
-    chatMode, setChatMode, suggestions,
+    chatMode, setChatMode, suggestions, suggestionsLoading,
     totalInputTokens, totalOutputTokens,
     cancelStream,
   } = useChatStore();
@@ -505,9 +505,10 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
       <div className="flex-shrink-0 p-4 bg-white dark:bg-gray-900">
         <div className="max-w-4xl mx-auto">
           {/* Suggestion chips */}
-          {!isStreaming && suggestions.length > 0 && !messages.some(m => m.error) && (
+          {!isStreaming && !messages.some(m => m.error) && (suggestions.length > 0 || suggestionsLoading) && (
             <SuggestionChips
               suggestions={suggestions}
+              loading={suggestionsLoading}
               onSelect={(text) => { setInput(text); textareaRef.current?.focus(); setTimeout(() => handleSend(), 0); }}
               className="mb-3"
             />
diff --git a/desktop/src/components/ai/SuggestionChips.tsx b/desktop/src/components/ai/SuggestionChips.tsx
index 99fa9ba..5c2f143 100644
--- a/desktop/src/components/ai/SuggestionChips.tsx
+++ b/desktop/src/components/ai/SuggestionChips.tsx
@@ -7,15 +7,30 @@ import { motion } from 'framer-motion';
  * - Horizontal scrollable chip list
  * - Click to fill input
  * - Animated entrance
+ * - Loading skeleton while LLM generates suggestions
  */
 
 interface SuggestionChipsProps {
   suggestions: string[];
+  loading?: boolean;
   onSelect: (text: string) => void;
   className?: string;
 }
 
-export function SuggestionChips({ suggestions, onSelect, className = '' }: SuggestionChipsProps) {
+export function SuggestionChips({ suggestions, loading, onSelect, className = '' }: SuggestionChipsProps) {
+  if (loading && suggestions.length === 0) {
+    return (
+      <div className={`flex flex-wrap gap-2 ${className}`}>
+        {[0, 1, 2].map((i) => (
+          <div
+            key={i}
+            className="h-7 w-28 rounded-full bg-gray-100 dark:bg-gray-800 animate-pulse"
+          />
+        ))}
+      </div>
+    );
+  }
+
   if (suggestions.length === 0) return null;
 
   return (
diff --git a/desktop/src/lib/llm-service.ts b/desktop/src/lib/llm-service.ts
index 4c60ce5..2439efe 100644
--- a/desktop/src/lib/llm-service.ts
+++ b/desktop/src/lib/llm-service.ts
@@ -644,6 +644,21 @@ const HARDCODED_PROMPTS: Record<string, { system: string; user: (arg: string) =>
 ]`,
     user: (conversation: string) => `从以下对话中提取值得长期记住的信息：\n\n${conversation}\n\n如果没有值得记忆的内容，返回空数组 []。`,
   },
+
+  suggestions: {
+    system: `你是对话分析助手。根据最近的对话内容，生成 3 个用户可能想继续探讨的问题。
+
+要求：
+- 每个问题必须与对话内容直接相关，具体且有针对性
+- 帮助用户深入理解、实际操作或拓展思路
+- 每个问题不超过 30 个中文字符
+- 不要重复对话中已讨论过的内容
+- 使用与用户相同的语言
+
+只输出 JSON 数组，包含恰好 3 个字符串。不要输出任何其他内容。
+示例：["如何在生产环境中部署？", "这个方案的成本如何？", "有没有更简单的替代方案？"]`,
+    user: (context: string) => `以下是对话中最近的消息：\n\n${context}\n\n请生成 3 个后续问题。`,
+  },
 };
 
 // === Prompt Cache (SaaS OTA) ===
@@ -806,6 +821,7 @@ export const LLM_PROMPTS = {
   get reflection() { return { system: getSystemPrompt('reflection'), user: getUserPromptTemplate('reflection')! }; },
   get compaction() { return { system: getSystemPrompt('compaction'), user: getUserPromptTemplate('compaction')! }; },
   get extraction() { return { system: getSystemPrompt('extraction'), user: getUserPromptTemplate('extraction')! }; },
+  get suggestions() { return { system: getSystemPrompt('suggestions'), user: getUserPromptTemplate('suggestions')! }; },
 };
 
 // === Telemetry Integration ===
@@ -876,3 +892,18 @@ export async function llmExtract(
   trackLLMCall(llm, response);
   return response.content;
 }
+
+export async function llmSuggest(
+  conversationContext: string,
+  adapter?: LLMServiceAdapter,
+): Promise<string> {
+  const llm = adapter || getLLMAdapter();
+
+  const response = await llm.complete([
+    { role: 'system', content: LLM_PROMPTS.suggestions.system },
+    { role: 'user', content: typeof LLM_PROMPTS.suggestions.user === 'function' ? LLM_PROMPTS.suggestions.user(conversationContext) : LLM_PROMPTS.suggestions.user },
+  ]);
+
+  trackLLMCall(llm, response);
+  return response.content;
+}
diff --git a/desktop/src/store/chat/streamStore.ts b/desktop/src/store/chat/streamStore.ts
index f6d9c9d..7de8482 100644
--- a/desktop/src/store/chat/streamStore.ts
+++ b/desktop/src/store/chat/streamStore.ts
@@ -573,10 +573,8 @@ async function generateLLMSuggestions(
     let raw: string;
 
     if (connectionMode === 'saas') {
-      // SaaS relay: use saasClient directly for reliable auth
       raw = await llmSuggestViaSaaS(context);
     } else {
-      // Local kernel: use llm-service adapter (GatewayLLMAdapter → agent_chat)
       raw = await llmSuggest(context);
     }
 
@@ -596,41 +594,72 @@ async function generateLLMSuggestions(
 }
 
 /**
- * Generate suggestions via SaaS relay, using saasStore auth directly.
+ * Generate suggestions via SaaS relay using SSE streaming.
+ * Uses the same streaming path as the main chat to avoid relay timeout issues
+ * with non-streaming requests. Collects the full response from SSE deltas,
+ * then parses the suggestion JSON from the accumulated text.
  */
 async function llmSuggestViaSaaS(context: string): Promise<string> {
-  const { useSaaSStore } = await import('../saasStore');
-  const { saasUrl, authToken } = useSaaSStore.getState();
-
-  if (!saasUrl || !authToken) {
-    throw new Error('SaaS not authenticated');
-  }
-
   const { saasClient } = await import('../../lib/saas-client');
-  saasClient.setBaseUrl(saasUrl);
-  saasClient.setToken(authToken);
+  const { useConversationStore } = await import('./conversationStore');
+  const { useSaaSStore } = await import('../saasStore');
 
-  const response = await saasClient.chatCompletion(
-    {
-      model: 'default',
-      messages: [
-        { role: 'system', content: LLM_PROMPTS_SYSTEM },
-        { role: 'user', content: `以下是对话中最近的消息：\n\n${context}\n\n请生成 3 个后续问题。` },
-      ],
-      max_tokens: 500,
-      temperature: 0.7,
-      stream: false,
-    },
-    AbortSignal.timeout(15000),
-  );
+  const currentModel = useConversationStore.getState().currentModel;
+  const availableModels = useSaaSStore.getState().availableModels;
+  const model = currentModel || (availableModels.length > 0 ? availableModels[0]?.id : undefined);
+  if (!model) throw new Error('No model available for suggestions');
 
-  if (!response.ok) {
-    const errText = await response.text().catch(() => 'unknown error');
-    throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`);
+  // Delay to avoid concurrent relay requests with memory extraction
+  await new Promise(r => setTimeout(r, 2000));
+
+  const controller = new AbortController();
+  const timeoutId = setTimeout(() => controller.abort(), 60000);
+
+  try {
+    const response = await saasClient.chatCompletion(
+      {
+        model,
+        messages: [
+          { role: 'system', content: LLM_PROMPTS_SYSTEM },
+          { role: 'user', content: `以下是对话中最近的消息：\n\n${context}\n\n请生成 3 个后续问题。` },
+        ],
+        max_tokens: 500,
+        temperature: 0.7,
+        stream: true,
+      },
+      controller.signal,
+    );
+
+    if (!response.ok) {
+      const errText = await response.text().catch(() => 'unknown error');
+      throw new Error(`SaaS relay error ${response.status}: ${errText.substring(0, 100)}`);
+    }
+
+    // Read full response as text — suggestion responses are small (max 500 tokens),
+    // so streaming is unnecessary. This avoids ReadableStream compatibility issues
+    // in Tauri WebView2 where body.getReader() may not yield SSE chunks correctly.
+    const rawText = await response.text();
+    log.debug('[Suggest] Raw response length:', rawText.length);
+
+    // Parse SSE "data:" lines from accumulated text
+    let accumulated = '';
+    for (const line of rawText.split('\n')) {
+      const trimmed = line.trim();
+      if (!trimmed.startsWith('data: ')) continue;
+      const payload = trimmed.slice(6).trim();
+      if (payload === '[DONE]') continue;
+      try {
+        const parsed = JSON.parse(payload);
+        const delta = parsed.choices?.[0]?.delta;
+        if (delta?.content) accumulated += delta.content;
+      } catch { /* skip malformed lines */ }
+    }
+
+    log.debug('[Suggest] Accumulated length:', accumulated.length);
+    return accumulated;
+  } finally {
+    clearTimeout(timeoutId);
   }
-
-  const data = await response.json();
-  return data?.choices?.[0]?.message?.content || '';
 }
 
 const LLM_PROMPTS_SYSTEM = `你是对话分析助手。根据最近的对话内容，生成 3 个用户可能想继续探讨的问题。
diff --git a/desktop/src/store/chatStore.ts b/desktop/src/store/chatStore.ts
index fba6047..06a7409 100644
--- a/desktop/src/store/chatStore.ts
+++ b/desktop/src/store/chatStore.ts
@@ -79,6 +79,7 @@ interface ChatState {
   totalOutputTokens: number;
   chatMode: ChatModeType;
   suggestions: string[];
+  suggestionsLoading: boolean;
 
   addMessage: (message: Message) => void;
   updateMessage: (id: string, updates: Partial<Message>) => void;
@@ -111,6 +112,7 @@ export const useChatStore = create<ChatState>()(
   isLoading: false,
   chatMode: 'thinking' as ChatModeType,
   suggestions: [],
+  suggestionsLoading: false,
   totalInputTokens: 0,
   totalOutputTokens: 0,
 
@@ -367,6 +369,7 @@ const unsubStream = useStreamStore.subscribe((state) => {
   if (chat.isLoading !== state.isLoading) updates.isLoading = state.isLoading;
   if (chat.chatMode !== state.chatMode) updates.chatMode = state.chatMode;
   if (chat.suggestions !== state.suggestions) updates.suggestions = state.suggestions;
+  if (chat.suggestionsLoading !== state.suggestionsLoading) updates.suggestionsLoading = state.suggestionsLoading;
   if (Object.keys(updates).length > 0) {
     useChatStore.setState(updates);
   }