perf(relay): full-chain optimization — key pool, model sync, SSE stream
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

Phase 1 (Key Pool correctness):
- RPM: fixed-minute window → sliding 60s aggregation (prevents 2x burst)
- Remove fallback-to-provider-key bypass when all keys rate-limited
- SSE semaphore: 16→64 permits, cleanup delay 60s→5s
- Default 429 cooldown: 5min→60s (better for Coding Plan quotas)
- Expire old key_usage_window rows on record

Phase 2 (Frontend model sync):
- currentModel empty-string fallback to glm-4-flash-250414 in relay client
- Merge duplicate listModels() calls in connectionStore SaaS path
- Show ModelSelector in Tauri mode when models available
- Clear currentModel on SaaS logout

Phase 3 (Relay performance):
- Key Pool: DashMap in-memory cache (TTL 5s) for select_best_key
- Cache invalidation on 429 marking

Phase 4 (SSE stream):
- AbortController integration for user-cancelled streams
- SSE parsing: split by event boundaries (\n\n) instead of per-line
- streamStore cancelStream adapts to 0-arg and 1-arg cancel fns
This commit is contained in:
iven
2026-04-09 19:34:02 +08:00
parent 5c6964f52a
commit e6eb97dcaa
7 changed files with 191 additions and 105 deletions

View File

@@ -92,6 +92,9 @@ export function createSaaSRelayGatewayClient(
// -----------------------------------------------------------------------
// Helper: OpenAI SSE streaming via SaaS relay
// -----------------------------------------------------------------------
// AbortController for cancelling active streams
let activeAbortController: AbortController | null = null;
async function chatStream(
message: string,
callbacks: {
@@ -112,10 +115,13 @@ export function createSaaSRelayGatewayClient(
},
): Promise<{ runId: string }> {
const runId = `run_${Date.now()}`;
const abortController = new AbortController();
activeAbortController = abortController;
const aborted = () => abortController.signal.aborted;
try {
const body: Record<string, unknown> = {
model: getModel(),
model: getModel() || 'glm-4-flash-250414',
messages: [{ role: 'user', content: message }],
stream: true,
};
@@ -148,67 +154,88 @@ export function createSaaSRelayGatewayClient(
const decoder = new TextDecoder();
let buffer = '';
while (true) {
while (!aborted()) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() || ''; // keep incomplete last line
for (const line of lines) {
if (!line.startsWith('data: ')) continue;
const data = line.slice(6).trim();
if (data === '[DONE]') continue;
// Optimized SSE parsing: split by double-newline (event boundaries)
let boundary: number;
while ((boundary = buffer.indexOf('\n\n')) !== -1) {
const eventBlock = buffer.slice(0, boundary);
buffer = buffer.slice(boundary + 2);
try {
const parsed = JSON.parse(data);
// Process each line in the event block
const lines = eventBlock.split('\n');
for (const line of lines) {
if (!line.startsWith('data: ')) continue;
const data = line.slice(6).trim();
if (data === '[DONE]') continue;
// Handle SSE error events from relay (e.g. stream_timeout)
if (parsed.error) {
const errMsg = parsed.message || parsed.error || 'Unknown stream error';
log.warn('SSE stream error:', errMsg);
callbacks.onError(errMsg);
callbacks.onComplete();
return { runId };
try {
const parsed = JSON.parse(data);
// Handle SSE error events from relay (e.g. stream_timeout)
if (parsed.error) {
const errMsg = parsed.message || parsed.error || 'Unknown stream error';
log.warn('SSE stream error:', errMsg);
callbacks.onError(errMsg);
callbacks.onComplete();
return { runId };
}
const choices = parsed.choices?.[0];
if (!choices) continue;
const delta = choices.delta;
// Handle thinking/reasoning content
if (delta?.reasoning_content) {
callbacks.onThinkingDelta?.(delta.reasoning_content);
}
// Handle regular content
if (delta?.content) {
callbacks.onDelta(delta.content);
}
// Check for completion
if (choices.finish_reason) {
const usage = parsed.usage;
callbacks.onComplete(
usage?.prompt_tokens,
usage?.completion_tokens,
);
return { runId };
}
} catch {
// Skip malformed SSE lines
}
const choices = parsed.choices?.[0];
if (!choices) continue;
const delta = choices.delta;
// Handle thinking/reasoning content
if (delta?.reasoning_content) {
callbacks.onThinkingDelta?.(delta.reasoning_content);
}
// Handle regular content
if (delta?.content) {
callbacks.onDelta(delta.content);
}
// Check for completion
if (choices.finish_reason) {
const usage = parsed.usage;
callbacks.onComplete(
usage?.prompt_tokens,
usage?.completion_tokens,
);
return { runId };
}
} catch {
// Skip malformed SSE lines
}
}
}
// If aborted, cancel the reader
if (aborted()) {
try { reader.cancel(); } catch { /* already closed */ }
}
// Stream ended without explicit finish_reason
callbacks.onComplete();
} catch (err) {
if (aborted()) {
// Cancelled by user — don't report as error
callbacks.onComplete();
return { runId };
}
const msg = err instanceof Error ? err.message : String(err);
callbacks.onError(msg);
callbacks.onComplete();
} finally {
if (activeAbortController === abortController) {
activeAbortController = null;
}
}
return { runId };
@@ -256,6 +283,13 @@ export function createSaaSRelayGatewayClient(
// --- Chat ---
chatStream,
cancelStream: () => {
if (activeAbortController) {
activeAbortController.abort();
activeAbortController = null;
log.info('SSE stream cancelled by user');
}
},
// --- Hands ---
listHands: async () => ({ hands: [] }),