perf(relay): full-chain optimization — key pool, model sync, SSE stream

Phase 1 (Key Pool correctness): - RPM: fixed-minute window → sliding 60s aggregation (prevents 2x burst) - Remove fallback-to-provider-key bypass when all keys rate-limited - SSE semaphore: 16→64 permits, cleanup delay 60s→5s - Default 429 cooldown: 5min→60s (better for Coding Plan quotas) - Expire old key_usage_window rows on record Phase 2 (Frontend model sync): - currentModel empty-string fallback to glm-4-flash-250414 in relay client - Merge duplicate listModels() calls in connectionStore SaaS path - Show ModelSelector in Tauri mode when models available - Clear currentModel on SaaS logout Phase 3 (Relay performance): - Key Pool: DashMap in-memory cache (TTL 5s) for select_best_key - Cache invalidation on 429 marking Phase 4 (SSE stream): - AbortController integration for user-cancelled streams - SSE parsing: split by event boundaries (\n\n) instead of per-line - streamStore cancelStream adapts to 0-arg and 1-arg cancel fns
2026-04-09 19:34:02 +08:00
parent 5c6964f52a
commit e6eb97dcaa
7 changed files with 191 additions and 105 deletions
--- a/desktop/src/components/ChatArea.tsx
+++ b/desktop/src/components/ChatArea.tsx
@@ -31,7 +31,6 @@ import { ReasoningBlock } from './ai/ReasoningBlock';
 import { StreamingText } from './ai/StreamingText';
 import { ChatMode } from './ai/ChatMode';
 import { ModelSelector } from './ai/ModelSelector';
-import { isTauriRuntime } from '../lib/tauri-gateway';
 import { SuggestionChips } from './ai/SuggestionChips';
 import { PipelineResultPreview } from './pipeline/PipelineResultPreview';
 import { PresentationContainer } from './presentation/PresentationContainer';
@@ -563,7 +562,7 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
                }
              </div>
              <div className="flex items-center gap-2">
-                {!isTauriRuntime() && (
+                {models.length > 0 && (
                  <ModelSelector
                    models={models.map(m => ({ id: m.id, name: m.name, provider: m.provider }))}
                    currentModel={currentModel}
--- a/desktop/src/lib/saas-relay-client.ts
+++ b/desktop/src/lib/saas-relay-client.ts
@@ -92,6 +92,9 @@ export function createSaaSRelayGatewayClient(
  // -----------------------------------------------------------------------
  // Helper: OpenAI SSE streaming via SaaS relay
  // -----------------------------------------------------------------------
+  // AbortController for cancelling active streams
+  let activeAbortController: AbortController | null = null;
+
  async function chatStream(
    message: string,
    callbacks: {
@@ -112,10 +115,13 @@ export function createSaaSRelayGatewayClient(
    },
  ): Promise<{ runId: string }> {
    const runId = `run_${Date.now()}`;
+    const abortController = new AbortController();
+    activeAbortController = abortController;
+    const aborted = () => abortController.signal.aborted;

    try {
      const body: Record<string, unknown> = {
-        model: getModel(),
+        model: getModel() || 'glm-4-flash-250414',
        messages: [{ role: 'user', content: message }],
        stream: true,
      };
@@ -148,67 +154,88 @@ export function createSaaSRelayGatewayClient(
      const decoder = new TextDecoder();
      let buffer = '';

-      while (true) {
+      while (!aborted()) {
        const { done, value } = await reader.read();
        if (done) break;

        buffer += decoder.decode(value, { stream: true });
-        const lines = buffer.split('\n');
-        buffer = lines.pop() || ''; // keep incomplete last line

-        for (const line of lines) {
-          if (!line.startsWith('data: ')) continue;
-          const data = line.slice(6).trim();
-          if (data === '[DONE]') continue;
+        // Optimized SSE parsing: split by double-newline (event boundaries)
+        let boundary: number;
+        while ((boundary = buffer.indexOf('\n\n')) !== -1) {
+          const eventBlock = buffer.slice(0, boundary);
+          buffer = buffer.slice(boundary + 2);

-          try {
-            const parsed = JSON.parse(data);
+          // Process each line in the event block
+          const lines = eventBlock.split('\n');
+          for (const line of lines) {
+            if (!line.startsWith('data: ')) continue;
+            const data = line.slice(6).trim();
+            if (data === '[DONE]') continue;

-            // Handle SSE error events from relay (e.g. stream_timeout)
-            if (parsed.error) {
-              const errMsg = parsed.message || parsed.error || 'Unknown stream error';
-              log.warn('SSE stream error:', errMsg);
-              callbacks.onError(errMsg);
-              callbacks.onComplete();
-              return { runId };
+            try {
+              const parsed = JSON.parse(data);
+
+              // Handle SSE error events from relay (e.g. stream_timeout)
+              if (parsed.error) {
+                const errMsg = parsed.message || parsed.error || 'Unknown stream error';
+                log.warn('SSE stream error:', errMsg);
+                callbacks.onError(errMsg);
+                callbacks.onComplete();
+                return { runId };
+              }
+
+              const choices = parsed.choices?.[0];
+              if (!choices) continue;
+
+              const delta = choices.delta;
+
+              // Handle thinking/reasoning content
+              if (delta?.reasoning_content) {
+                callbacks.onThinkingDelta?.(delta.reasoning_content);
+              }
+
+              // Handle regular content
+              if (delta?.content) {
+                callbacks.onDelta(delta.content);
+              }
+
+              // Check for completion
+              if (choices.finish_reason) {
+                const usage = parsed.usage;
+                callbacks.onComplete(
+                  usage?.prompt_tokens,
+                  usage?.completion_tokens,
+                );
+                return { runId };
+              }
+            } catch {
+              // Skip malformed SSE lines
            }
-
-            const choices = parsed.choices?.[0];
-            if (!choices) continue;
-
-            const delta = choices.delta;
-
-            // Handle thinking/reasoning content
-            if (delta?.reasoning_content) {
-              callbacks.onThinkingDelta?.(delta.reasoning_content);
-            }
-
-            // Handle regular content
-            if (delta?.content) {
-              callbacks.onDelta(delta.content);
-            }
-
-            // Check for completion
-            if (choices.finish_reason) {
-              const usage = parsed.usage;
-              callbacks.onComplete(
-                usage?.prompt_tokens,
-                usage?.completion_tokens,
-              );
-              return { runId };
-            }
-          } catch {
-            // Skip malformed SSE lines
          }
        }
      }

+      // If aborted, cancel the reader
+      if (aborted()) {
+        try { reader.cancel(); } catch { /* already closed */ }
+      }
+
      // Stream ended without explicit finish_reason
      callbacks.onComplete();
    } catch (err) {
+      if (aborted()) {
+        // Cancelled by user — don't report as error
+        callbacks.onComplete();
+        return { runId };
+      }
      const msg = err instanceof Error ? err.message : String(err);
      callbacks.onError(msg);
      callbacks.onComplete();
+    } finally {
+      if (activeAbortController === abortController) {
+        activeAbortController = null;
+      }
    }

    return { runId };
@@ -256,6 +283,13 @@ export function createSaaSRelayGatewayClient(

    // --- Chat ---
    chatStream,
+    cancelStream: () => {
+      if (activeAbortController) {
+        activeAbortController.abort();
+        activeAbortController = null;
+        log.info('SSE stream cancelled by user');
+      }
+    },

    // --- Hands ---
    listHands: async () => ({ hands: [] }),
--- a/desktop/src/store/chat/streamStore.ts
+++ b/desktop/src/store/chat/streamStore.ts
@@ -581,11 +581,20 @@ export const useStreamStore = create<StreamState>()(
    if (!isStreaming) return;

    // 1. Tell backend to abort — use sessionKey (which is the sessionId in Tauri)
+    //    Also abort the frontend SSE fetch via cancelStream()
    try {
-      const client = getClient();
+      const client = getClient() as unknown as Record<string, unknown>;
      if ('cancelStream' in client) {
-        const sessionId = useConversationStore.getState().sessionKey || activeRunId || '';
-        (client as { cancelStream: (id: string) => void }).cancelStream(sessionId);
+        const fn = client.cancelStream;
+        if (typeof fn === 'function') {
+          // Call with or without sessionId depending on arity
+          if (fn.length > 0) {
+            const sessionId = useConversationStore.getState().sessionKey || activeRunId || '';
+            (fn as (id: string) => void)(sessionId);
+          } else {
+            (fn as () => void)();
+          }
+        }
      }
    } catch {
      // Backend cancel is best-effort; proceed with local cleanup
--- a/desktop/src/store/connectionStore.ts
+++ b/desktop/src/store/connectionStore.ts
@@ -441,9 +441,10 @@ export const useConnectionStore = create<ConnectionStore>((set, get) => {
          // Configure the singleton client (cookie auth — no token needed)
          saasClient.setBaseUrl(session.saasUrl);

-          // Health check via GET /api/v1/relay/models
+          // Health check + model list: merged single listModels() call
+          let relayModels: Array<{ id: string; alias?: string }> | null = null;
          try {
-            await saasClient.listModels();
+            relayModels = await saasClient.listModels();
          } catch (err: unknown) {
            // Handle expired session — clear auth and trigger re-login
            const status = (err as { status?: number })?.status;
@@ -473,15 +474,8 @@ export const useConnectionStore = create<ConnectionStore>((set, get) => {
            // baseUrl = saasUrl + /api/v1/relay → kernel appends /chat/completions
            // apiKey = SaaS JWT token → sent as Authorization: Bearer <jwt>

-            // Fetch available models from SaaS relay (shared by both branches)
-            let relayModels: Array<{ id: string }>;
-            try {
-              relayModels = await saasClient.listModels();
-            } catch {
-              throw new Error('无法获取可用模型列表，请确认管理后台已配置 Provider 和模型');
-            }
-
-            if (relayModels.length === 0) {
+            // Models already fetched during health check above
+            if (!relayModels || relayModels.length === 0) {
              throw new Error('SaaS 平台没有可用模型，请先在管理后台配置 Provider 和模型');
            }

--- a/desktop/src/store/saasStore.ts
+++ b/desktop/src/store/saasStore.ts
@@ -425,6 +425,12 @@ export const useSaaSStore = create<SaaSStore>((set, get) => {
      stopTelemetryCollector();
      stopPromptOTASync();

+      // Clear currentModel so next connection uses fresh model resolution
+      try {
+        const { useConversationStore } = require('./chat/conversationStore');
+        useConversationStore.getState().setCurrentModel('');
+      } catch { /* non-critical */ }
+
      set({
        isLoggedIn: false,
        account: null,