fix: 三端联调测试 2 P0 + 6 P1 + 2 P2 修复
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
P0-1: SaaS relay 模型别名解析 — "glm-4-flash" → "glm-4-flash-250414" (resolve_model)
P0-2: config.rs interpolate_env_vars UTF-8 修复 (chars 迭代器替代 bytes as char)
+ DB 启动编码检查 + docker-compose UTF-8 编码参数
P1-3: UI 模型选择器覆盖 Agent 默认模型 (model_override 全链路: TS→Tauri→Rust kernel)
P1-6: 知识搜索管道修复 — seed_knowledge 创建 chunks + 默认分类 (seed/uploaded/distillation)
P1-7: 用量限额从当前 Plan 读取 (非 stale usage 表)
P1-8: relay 双维度配额检查 (relay_requests + input_tokens)
P2-9: SSE 路径 token 计数修复 — 流结束检测替代固定 500ms sleep + billing increment
This commit is contained in:
@@ -152,8 +152,8 @@ pub async fn chat_completions(
|
||||
}
|
||||
ModelResolution::Group(candidates)
|
||||
} else {
|
||||
// 向后兼容:直接模型查找
|
||||
let target_model = state.cache.get_model(model_name)
|
||||
// 向后兼容:直接模型查找 + 别名解析(如 "glm-4-flash" → "glm-4-flash-250414")
|
||||
let target_model = state.cache.resolve_model(model_name)
|
||||
.ok_or_else(|| SaasError::NotFound(format!("模型 {} 不存在或未启用", model_name)))?;
|
||||
|
||||
// 获取 provider 信息 — 使用内存缓存消除 DB 查询
|
||||
@@ -218,7 +218,7 @@ pub async fn chat_completions(
|
||||
ModelResolution::Direct(ref candidate) => {
|
||||
// 单 Provider 直接路由(向后兼容)
|
||||
match service::execute_relay(
|
||||
&state.db, &task.id, &candidate.provider_id,
|
||||
&state.db, &task.id, &ctx.account_id, &candidate.provider_id,
|
||||
&candidate.base_url, &request_body, stream,
|
||||
max_attempts, retry_delay_ms, &enc_key,
|
||||
true, // 独立调用,管理 task 状态
|
||||
@@ -233,7 +233,7 @@ pub async fn chat_completions(
|
||||
// SSE 一旦开始流式传输,中途上游断连不会触发 failover(SSE 协议固有限制)。
|
||||
service::sort_candidates_by_quota(&state.db, candidates).await;
|
||||
service::execute_relay_with_failover(
|
||||
&state.db, &task.id, candidates,
|
||||
&state.db, &task.id, &ctx.account_id, candidates,
|
||||
&request_body, stream,
|
||||
max_attempts, retry_delay_ms, &enc_key
|
||||
).await
|
||||
@@ -553,11 +553,12 @@ pub async fn retry_task(
|
||||
// 异步执行重试 — 根据解析结果选择执行路径
|
||||
let db = state.db.clone();
|
||||
let task_id = id.clone();
|
||||
let account_id_for_spawn = task.account_id.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
let result = match model_resolution {
|
||||
ModelResolution::Direct(ref candidate) => {
|
||||
service::execute_relay(
|
||||
&db, &task_id, &candidate.provider_id,
|
||||
&db, &task_id, &account_id_for_spawn, &candidate.provider_id,
|
||||
&candidate.base_url, &body, stream,
|
||||
max_attempts, base_delay_ms, &enc_key,
|
||||
true,
|
||||
@@ -566,7 +567,7 @@ pub async fn retry_task(
|
||||
ModelResolution::Group(ref mut candidates) => {
|
||||
service::sort_candidates_by_quota(&db, candidates).await;
|
||||
service::execute_relay_with_failover(
|
||||
&db, &task_id, candidates,
|
||||
&db, &task_id, &account_id_for_spawn, candidates,
|
||||
&body, stream,
|
||||
max_attempts, base_delay_ms, &enc_key,
|
||||
).await
|
||||
|
||||
@@ -217,6 +217,7 @@ impl SseUsageCapture {
|
||||
pub async fn execute_relay(
|
||||
db: &PgPool,
|
||||
task_id: &str,
|
||||
account_id: &str,
|
||||
provider_id: &str,
|
||||
provider_base_url: &str,
|
||||
request_body: &str,
|
||||
@@ -313,6 +314,7 @@ pub async fn execute_relay(
|
||||
let db_clone = db.clone();
|
||||
let task_id_clone = task_id.to_string();
|
||||
let key_id_for_spawn = key_id.clone();
|
||||
let account_id_clone = account_id.to_string();
|
||||
|
||||
// Bounded channel for backpressure: 128 chunks (~128KB) buffer.
|
||||
// If the client reads slowly, the upstream is signaled via
|
||||
@@ -369,20 +371,53 @@ pub async fn execute_relay(
|
||||
|
||||
tokio::spawn(async move {
|
||||
let _permit = permit; // 持有 permit 直到任务完成
|
||||
// Brief delay to allow SSE stream to settle before recording
|
||||
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
||||
let capture = usage_capture.lock().await;
|
||||
let (input, output) = (
|
||||
if capture.input_tokens > 0 { Some(capture.input_tokens) } else { None },
|
||||
if capture.output_tokens > 0 { Some(capture.output_tokens) } else { None },
|
||||
);
|
||||
// Record task status with timeout to avoid holding DB connections
|
||||
// 等待 SSE 流结束 — 等待 capture 稳定(tokens 不再增长)
|
||||
// 替代原来固定 500ms 的 race condition
|
||||
let max_wait = std::time::Duration::from_secs(120);
|
||||
let poll_interval = std::time::Duration::from_millis(500);
|
||||
let start = tokio::time::Instant::now();
|
||||
let mut last_tokens: i64 = 0;
|
||||
let mut stable_count = 0;
|
||||
let (input, output) = loop {
|
||||
tokio::time::sleep(poll_interval).await;
|
||||
let capture = usage_capture.lock().await;
|
||||
let total = capture.input_tokens + capture.output_tokens;
|
||||
if total == last_tokens && total > 0 {
|
||||
stable_count += 1;
|
||||
if stable_count >= 3 {
|
||||
// 连续 3 次稳定(1.5s),认为流结束
|
||||
break (capture.input_tokens, capture.output_tokens);
|
||||
}
|
||||
} else {
|
||||
stable_count = 0;
|
||||
last_tokens = total;
|
||||
}
|
||||
drop(capture);
|
||||
if start.elapsed() >= max_wait {
|
||||
let capture = usage_capture.lock().await;
|
||||
break (capture.input_tokens, capture.output_tokens);
|
||||
}
|
||||
};
|
||||
|
||||
let input_opt = if input > 0 { Some(input) } else { None };
|
||||
let output_opt = if output > 0 { Some(output) } else { None };
|
||||
|
||||
// Record task status + billing usage + key usage
|
||||
let db_op = async {
|
||||
if let Err(e) = update_task_status(&db_clone, &task_id_clone, "completed", input, output, None).await {
|
||||
if let Err(e) = update_task_status(&db_clone, &task_id_clone, "completed", input_opt, output_opt, None).await {
|
||||
tracing::warn!("Failed to update task status after SSE stream: {}", e);
|
||||
}
|
||||
// Record key usage (now 2 queries instead of 3)
|
||||
let total_tokens = input.unwrap_or(0) + output.unwrap_or(0);
|
||||
// P2-9 修复: SSE 路径也更新 billing_usage_quotas
|
||||
if input > 0 || output > 0 {
|
||||
if let Err(e) = crate::billing::service::increment_usage(
|
||||
&db_clone, &account_id_clone,
|
||||
input, output,
|
||||
).await {
|
||||
tracing::warn!("Failed to increment billing usage for SSE task {}: {}", task_id_clone, e);
|
||||
}
|
||||
}
|
||||
// Record key usage
|
||||
let total_tokens = input + output;
|
||||
if let Err(e) = super::key_pool::record_key_usage(&db_clone, &key_id_for_spawn, Some(total_tokens)).await {
|
||||
tracing::warn!("Failed to record key usage: {}", e);
|
||||
}
|
||||
@@ -503,6 +538,7 @@ pub async fn execute_relay(
|
||||
pub async fn execute_relay_with_failover(
|
||||
db: &PgPool,
|
||||
task_id: &str,
|
||||
account_id: &str,
|
||||
candidates: &[CandidateModel],
|
||||
request_body: &str,
|
||||
stream: bool,
|
||||
@@ -533,6 +569,7 @@ pub async fn execute_relay_with_failover(
|
||||
match execute_relay(
|
||||
db,
|
||||
task_id,
|
||||
account_id,
|
||||
&candidate.provider_id,
|
||||
&candidate.base_url,
|
||||
&patched_body,
|
||||
|
||||
Reference in New Issue
Block a user