fix: 三端联调测试 2 P0 + 6 P1 + 2 P2 修复
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

P0-1: SaaS relay 模型别名解析 — "glm-4-flash" → "glm-4-flash-250414" (resolve_model)
P0-2: config.rs interpolate_env_vars UTF-8 修复 (chars 迭代器替代 bytes as char)
      + DB 启动编码检查 + docker-compose UTF-8 编码参数

P1-3: UI 模型选择器覆盖 Agent 默认模型 (model_override 全链路: TS→Tauri→Rust kernel)
P1-6: 知识搜索管道修复 — seed_knowledge 创建 chunks + 默认分类 (seed/uploaded/distillation)
P1-7: 用量限额从当前 Plan 读取 (非 stale usage 表)
P1-8: relay 双维度配额检查 (relay_requests + input_tokens)

P2-9: SSE 路径 token 计数修复 — 流结束检测替代固定 500ms sleep + billing increment
This commit is contained in:
iven
2026-04-14 00:17:08 +08:00
parent 0903a0d652
commit 4c3136890b
13 changed files with 234 additions and 49 deletions

View File

@@ -152,8 +152,8 @@ pub async fn chat_completions(
}
ModelResolution::Group(candidates)
} else {
// 向后兼容:直接模型查找
let target_model = state.cache.get_model(model_name)
// 向后兼容:直接模型查找 + 别名解析(如 "glm-4-flash" → "glm-4-flash-250414"
let target_model = state.cache.resolve_model(model_name)
.ok_or_else(|| SaasError::NotFound(format!("模型 {} 不存在或未启用", model_name)))?;
// 获取 provider 信息 — 使用内存缓存消除 DB 查询
@@ -218,7 +218,7 @@ pub async fn chat_completions(
ModelResolution::Direct(ref candidate) => {
// 单 Provider 直接路由(向后兼容)
match service::execute_relay(
&state.db, &task.id, &candidate.provider_id,
&state.db, &task.id, &ctx.account_id, &candidate.provider_id,
&candidate.base_url, &request_body, stream,
max_attempts, retry_delay_ms, &enc_key,
true, // 独立调用,管理 task 状态
@@ -233,7 +233,7 @@ pub async fn chat_completions(
// SSE 一旦开始流式传输,中途上游断连不会触发 failoverSSE 协议固有限制)。
service::sort_candidates_by_quota(&state.db, candidates).await;
service::execute_relay_with_failover(
&state.db, &task.id, candidates,
&state.db, &task.id, &ctx.account_id, candidates,
&request_body, stream,
max_attempts, retry_delay_ms, &enc_key
).await
@@ -553,11 +553,12 @@ pub async fn retry_task(
// 异步执行重试 — 根据解析结果选择执行路径
let db = state.db.clone();
let task_id = id.clone();
let account_id_for_spawn = task.account_id.clone();
let handle = tokio::spawn(async move {
let result = match model_resolution {
ModelResolution::Direct(ref candidate) => {
service::execute_relay(
&db, &task_id, &candidate.provider_id,
&db, &task_id, &account_id_for_spawn, &candidate.provider_id,
&candidate.base_url, &body, stream,
max_attempts, base_delay_ms, &enc_key,
true,
@@ -566,7 +567,7 @@ pub async fn retry_task(
ModelResolution::Group(ref mut candidates) => {
service::sort_candidates_by_quota(&db, candidates).await;
service::execute_relay_with_failover(
&db, &task_id, candidates,
&db, &task_id, &account_id_for_spawn, candidates,
&body, stream,
max_attempts, base_delay_ms, &enc_key,
).await

View File

@@ -217,6 +217,7 @@ impl SseUsageCapture {
pub async fn execute_relay(
db: &PgPool,
task_id: &str,
account_id: &str,
provider_id: &str,
provider_base_url: &str,
request_body: &str,
@@ -313,6 +314,7 @@ pub async fn execute_relay(
let db_clone = db.clone();
let task_id_clone = task_id.to_string();
let key_id_for_spawn = key_id.clone();
let account_id_clone = account_id.to_string();
// Bounded channel for backpressure: 128 chunks (~128KB) buffer.
// If the client reads slowly, the upstream is signaled via
@@ -369,20 +371,53 @@ pub async fn execute_relay(
tokio::spawn(async move {
let _permit = permit; // 持有 permit 直到任务完成
// Brief delay to allow SSE stream to settle before recording
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
let capture = usage_capture.lock().await;
let (input, output) = (
if capture.input_tokens > 0 { Some(capture.input_tokens) } else { None },
if capture.output_tokens > 0 { Some(capture.output_tokens) } else { None },
);
// Record task status with timeout to avoid holding DB connections
// 等待 SSE 流结束 — 等待 capture 稳定tokens 不再增长)
// 替代原来固定 500ms 的 race condition
let max_wait = std::time::Duration::from_secs(120);
let poll_interval = std::time::Duration::from_millis(500);
let start = tokio::time::Instant::now();
let mut last_tokens: i64 = 0;
let mut stable_count = 0;
let (input, output) = loop {
tokio::time::sleep(poll_interval).await;
let capture = usage_capture.lock().await;
let total = capture.input_tokens + capture.output_tokens;
if total == last_tokens && total > 0 {
stable_count += 1;
if stable_count >= 3 {
// 连续 3 次稳定1.5s),认为流结束
break (capture.input_tokens, capture.output_tokens);
}
} else {
stable_count = 0;
last_tokens = total;
}
drop(capture);
if start.elapsed() >= max_wait {
let capture = usage_capture.lock().await;
break (capture.input_tokens, capture.output_tokens);
}
};
let input_opt = if input > 0 { Some(input) } else { None };
let output_opt = if output > 0 { Some(output) } else { None };
// Record task status + billing usage + key usage
let db_op = async {
if let Err(e) = update_task_status(&db_clone, &task_id_clone, "completed", input, output, None).await {
if let Err(e) = update_task_status(&db_clone, &task_id_clone, "completed", input_opt, output_opt, None).await {
tracing::warn!("Failed to update task status after SSE stream: {}", e);
}
// Record key usage (now 2 queries instead of 3)
let total_tokens = input.unwrap_or(0) + output.unwrap_or(0);
// P2-9 修复: SSE 路径也更新 billing_usage_quotas
if input > 0 || output > 0 {
if let Err(e) = crate::billing::service::increment_usage(
&db_clone, &account_id_clone,
input, output,
).await {
tracing::warn!("Failed to increment billing usage for SSE task {}: {}", task_id_clone, e);
}
}
// Record key usage
let total_tokens = input + output;
if let Err(e) = super::key_pool::record_key_usage(&db_clone, &key_id_for_spawn, Some(total_tokens)).await {
tracing::warn!("Failed to record key usage: {}", e);
}
@@ -503,6 +538,7 @@ pub async fn execute_relay(
pub async fn execute_relay_with_failover(
db: &PgPool,
task_id: &str,
account_id: &str,
candidates: &[CandidateModel],
request_body: &str,
stream: bool,
@@ -533,6 +569,7 @@ pub async fn execute_relay_with_failover(
match execute_relay(
db,
task_id,
account_id,
&candidate.provider_id,
&candidate.base_url,
&patched_body,