perf(relay): full-chain optimization — key pool, model sync, SSE stream
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Phase 1 (Key Pool correctness): - RPM: fixed-minute window → sliding 60s aggregation (prevents 2x burst) - Remove fallback-to-provider-key bypass when all keys rate-limited - SSE semaphore: 16→64 permits, cleanup delay 60s→5s - Default 429 cooldown: 5min→60s (better for Coding Plan quotas) - Expire old key_usage_window rows on record Phase 2 (Frontend model sync): - currentModel empty-string fallback to glm-4-flash-250414 in relay client - Merge duplicate listModels() calls in connectionStore SaaS path - Show ModelSelector in Tauri mode when models available - Clear currentModel on SaaS logout Phase 3 (Relay performance): - Key Pool: DashMap in-memory cache (TTL 5s) for select_best_key - Cache invalidation on 429 marking Phase 4 (SSE stream): - AbortController integration for user-cancelled streams - SSE parsing: split by event boundaries (\n\n) instead of per-line - streamStore cancelStream adapts to 0-arg and 1-arg cancel fns
This commit is contained in:
@@ -31,7 +31,6 @@ import { ReasoningBlock } from './ai/ReasoningBlock';
|
||||
import { StreamingText } from './ai/StreamingText';
|
||||
import { ChatMode } from './ai/ChatMode';
|
||||
import { ModelSelector } from './ai/ModelSelector';
|
||||
import { isTauriRuntime } from '../lib/tauri-gateway';
|
||||
import { SuggestionChips } from './ai/SuggestionChips';
|
||||
import { PipelineResultPreview } from './pipeline/PipelineResultPreview';
|
||||
import { PresentationContainer } from './presentation/PresentationContainer';
|
||||
@@ -563,7 +562,7 @@ export function ChatArea({ compact, onOpenDetail }: { compact?: boolean; onOpenD
|
||||
}
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{!isTauriRuntime() && (
|
||||
{models.length > 0 && (
|
||||
<ModelSelector
|
||||
models={models.map(m => ({ id: m.id, name: m.name, provider: m.provider }))}
|
||||
currentModel={currentModel}
|
||||
|
||||
@@ -92,6 +92,9 @@ export function createSaaSRelayGatewayClient(
|
||||
// -----------------------------------------------------------------------
|
||||
// Helper: OpenAI SSE streaming via SaaS relay
|
||||
// -----------------------------------------------------------------------
|
||||
// AbortController for cancelling active streams
|
||||
let activeAbortController: AbortController | null = null;
|
||||
|
||||
async function chatStream(
|
||||
message: string,
|
||||
callbacks: {
|
||||
@@ -112,10 +115,13 @@ export function createSaaSRelayGatewayClient(
|
||||
},
|
||||
): Promise<{ runId: string }> {
|
||||
const runId = `run_${Date.now()}`;
|
||||
const abortController = new AbortController();
|
||||
activeAbortController = abortController;
|
||||
const aborted = () => abortController.signal.aborted;
|
||||
|
||||
try {
|
||||
const body: Record<string, unknown> = {
|
||||
model: getModel(),
|
||||
model: getModel() || 'glm-4-flash-250414',
|
||||
messages: [{ role: 'user', content: message }],
|
||||
stream: true,
|
||||
};
|
||||
@@ -148,67 +154,88 @@ export function createSaaSRelayGatewayClient(
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
while (!aborted()) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || ''; // keep incomplete last line
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data: ')) continue;
|
||||
const data = line.slice(6).trim();
|
||||
if (data === '[DONE]') continue;
|
||||
// Optimized SSE parsing: split by double-newline (event boundaries)
|
||||
let boundary: number;
|
||||
while ((boundary = buffer.indexOf('\n\n')) !== -1) {
|
||||
const eventBlock = buffer.slice(0, boundary);
|
||||
buffer = buffer.slice(boundary + 2);
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
// Process each line in the event block
|
||||
const lines = eventBlock.split('\n');
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data: ')) continue;
|
||||
const data = line.slice(6).trim();
|
||||
if (data === '[DONE]') continue;
|
||||
|
||||
// Handle SSE error events from relay (e.g. stream_timeout)
|
||||
if (parsed.error) {
|
||||
const errMsg = parsed.message || parsed.error || 'Unknown stream error';
|
||||
log.warn('SSE stream error:', errMsg);
|
||||
callbacks.onError(errMsg);
|
||||
callbacks.onComplete();
|
||||
return { runId };
|
||||
try {
|
||||
const parsed = JSON.parse(data);
|
||||
|
||||
// Handle SSE error events from relay (e.g. stream_timeout)
|
||||
if (parsed.error) {
|
||||
const errMsg = parsed.message || parsed.error || 'Unknown stream error';
|
||||
log.warn('SSE stream error:', errMsg);
|
||||
callbacks.onError(errMsg);
|
||||
callbacks.onComplete();
|
||||
return { runId };
|
||||
}
|
||||
|
||||
const choices = parsed.choices?.[0];
|
||||
if (!choices) continue;
|
||||
|
||||
const delta = choices.delta;
|
||||
|
||||
// Handle thinking/reasoning content
|
||||
if (delta?.reasoning_content) {
|
||||
callbacks.onThinkingDelta?.(delta.reasoning_content);
|
||||
}
|
||||
|
||||
// Handle regular content
|
||||
if (delta?.content) {
|
||||
callbacks.onDelta(delta.content);
|
||||
}
|
||||
|
||||
// Check for completion
|
||||
if (choices.finish_reason) {
|
||||
const usage = parsed.usage;
|
||||
callbacks.onComplete(
|
||||
usage?.prompt_tokens,
|
||||
usage?.completion_tokens,
|
||||
);
|
||||
return { runId };
|
||||
}
|
||||
} catch {
|
||||
// Skip malformed SSE lines
|
||||
}
|
||||
|
||||
const choices = parsed.choices?.[0];
|
||||
if (!choices) continue;
|
||||
|
||||
const delta = choices.delta;
|
||||
|
||||
// Handle thinking/reasoning content
|
||||
if (delta?.reasoning_content) {
|
||||
callbacks.onThinkingDelta?.(delta.reasoning_content);
|
||||
}
|
||||
|
||||
// Handle regular content
|
||||
if (delta?.content) {
|
||||
callbacks.onDelta(delta.content);
|
||||
}
|
||||
|
||||
// Check for completion
|
||||
if (choices.finish_reason) {
|
||||
const usage = parsed.usage;
|
||||
callbacks.onComplete(
|
||||
usage?.prompt_tokens,
|
||||
usage?.completion_tokens,
|
||||
);
|
||||
return { runId };
|
||||
}
|
||||
} catch {
|
||||
// Skip malformed SSE lines
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If aborted, cancel the reader
|
||||
if (aborted()) {
|
||||
try { reader.cancel(); } catch { /* already closed */ }
|
||||
}
|
||||
|
||||
// Stream ended without explicit finish_reason
|
||||
callbacks.onComplete();
|
||||
} catch (err) {
|
||||
if (aborted()) {
|
||||
// Cancelled by user — don't report as error
|
||||
callbacks.onComplete();
|
||||
return { runId };
|
||||
}
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
callbacks.onError(msg);
|
||||
callbacks.onComplete();
|
||||
} finally {
|
||||
if (activeAbortController === abortController) {
|
||||
activeAbortController = null;
|
||||
}
|
||||
}
|
||||
|
||||
return { runId };
|
||||
@@ -256,6 +283,13 @@ export function createSaaSRelayGatewayClient(
|
||||
|
||||
// --- Chat ---
|
||||
chatStream,
|
||||
cancelStream: () => {
|
||||
if (activeAbortController) {
|
||||
activeAbortController.abort();
|
||||
activeAbortController = null;
|
||||
log.info('SSE stream cancelled by user');
|
||||
}
|
||||
},
|
||||
|
||||
// --- Hands ---
|
||||
listHands: async () => ({ hands: [] }),
|
||||
|
||||
@@ -581,11 +581,20 @@ export const useStreamStore = create<StreamState>()(
|
||||
if (!isStreaming) return;
|
||||
|
||||
// 1. Tell backend to abort — use sessionKey (which is the sessionId in Tauri)
|
||||
// Also abort the frontend SSE fetch via cancelStream()
|
||||
try {
|
||||
const client = getClient();
|
||||
const client = getClient() as unknown as Record<string, unknown>;
|
||||
if ('cancelStream' in client) {
|
||||
const sessionId = useConversationStore.getState().sessionKey || activeRunId || '';
|
||||
(client as { cancelStream: (id: string) => void }).cancelStream(sessionId);
|
||||
const fn = client.cancelStream;
|
||||
if (typeof fn === 'function') {
|
||||
// Call with or without sessionId depending on arity
|
||||
if (fn.length > 0) {
|
||||
const sessionId = useConversationStore.getState().sessionKey || activeRunId || '';
|
||||
(fn as (id: string) => void)(sessionId);
|
||||
} else {
|
||||
(fn as () => void)();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Backend cancel is best-effort; proceed with local cleanup
|
||||
|
||||
@@ -441,9 +441,10 @@ export const useConnectionStore = create<ConnectionStore>((set, get) => {
|
||||
// Configure the singleton client (cookie auth — no token needed)
|
||||
saasClient.setBaseUrl(session.saasUrl);
|
||||
|
||||
// Health check via GET /api/v1/relay/models
|
||||
// Health check + model list: merged single listModels() call
|
||||
let relayModels: Array<{ id: string; alias?: string }> | null = null;
|
||||
try {
|
||||
await saasClient.listModels();
|
||||
relayModels = await saasClient.listModels();
|
||||
} catch (err: unknown) {
|
||||
// Handle expired session — clear auth and trigger re-login
|
||||
const status = (err as { status?: number })?.status;
|
||||
@@ -473,15 +474,8 @@ export const useConnectionStore = create<ConnectionStore>((set, get) => {
|
||||
// baseUrl = saasUrl + /api/v1/relay → kernel appends /chat/completions
|
||||
// apiKey = SaaS JWT token → sent as Authorization: Bearer <jwt>
|
||||
|
||||
// Fetch available models from SaaS relay (shared by both branches)
|
||||
let relayModels: Array<{ id: string }>;
|
||||
try {
|
||||
relayModels = await saasClient.listModels();
|
||||
} catch {
|
||||
throw new Error('无法获取可用模型列表,请确认管理后台已配置 Provider 和模型');
|
||||
}
|
||||
|
||||
if (relayModels.length === 0) {
|
||||
// Models already fetched during health check above
|
||||
if (!relayModels || relayModels.length === 0) {
|
||||
throw new Error('SaaS 平台没有可用模型,请先在管理后台配置 Provider 和模型');
|
||||
}
|
||||
|
||||
|
||||
@@ -425,6 +425,12 @@ export const useSaaSStore = create<SaaSStore>((set, get) => {
|
||||
stopTelemetryCollector();
|
||||
stopPromptOTASync();
|
||||
|
||||
// Clear currentModel so next connection uses fresh model resolution
|
||||
try {
|
||||
const { useConversationStore } = require('./chat/conversationStore');
|
||||
useConversationStore.getState().setCurrentModel('');
|
||||
} catch { /* non-critical */ }
|
||||
|
||||
set({
|
||||
isLoggedIn: false,
|
||||
account: null,
|
||||
|
||||
Reference in New Issue
Block a user