chore: 提交所有工作进度 — SaaS 后端增强、Admin UI、桌面端集成

包含大量 SaaS 平台改进、Admin 管理后台更新、桌面端集成完善、
文档同步、测试文件重构等内容。为 QA 测试准备干净工作树。
This commit is contained in:
iven
2026-03-29 10:46:26 +08:00
parent 9a5fad2b59
commit 5fdf96c3f5
268 changed files with 22011 additions and 3886 deletions

View File

@@ -13,6 +13,7 @@
//! Optionally flushes old messages to the growth/memory system before discarding.
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use zclaw_types::{AgentId, Message, SessionId};
use crate::driver::{CompletionRequest, ContentBlock, LlmDriver};
@@ -40,9 +41,18 @@ pub fn estimate_tokens(text: &str) -> usize {
{
// CJK ideographs — ~1.5 tokens
tokens += 1.5;
} else if (0xAC00..=0xD7AF).contains(&code) || (0x1100..=0x11FF).contains(&code) {
// Korean Hangul syllables + Jamo — ~1.5 tokens
tokens += 1.5;
} else if (0x3040..=0x309F).contains(&code) || (0x30A0..=0x30FF).contains(&code) {
// Japanese Hiragana + Katakana — ~1.5 tokens
tokens += 1.5;
} else if (0x3000..=0x303F).contains(&code) || (0xFF00..=0xFFEF).contains(&code) {
// CJK / fullwidth punctuation — ~1.0 token
tokens += 1.0;
} else if (0x1F000..=0x1FAFF).contains(&code) || (0x2600..=0x27BF).contains(&code) {
// Emoji & Symbols — ~2.0 tokens
tokens += 2.0;
} else if char == ' ' || char == '\n' || char == '\t' {
// whitespace
tokens += 0.25;
@@ -88,6 +98,54 @@ pub fn estimate_messages_tokens(messages: &[Message]) -> usize {
total
}
// ============================================================
// Calibration: adjust heuristic estimates using API feedback
// ============================================================
const F64_1_0_BITS: u64 = 4607182418800017408u64; // 1.0f64.to_bits()
/// Global calibration factor for token estimation (stored as f64 bits).
///
/// Updated via exponential moving average when API returns actual token counts.
/// Initial value is 1.0 (no adjustment).
static CALIBRATION_FACTOR_BITS: AtomicU64 = AtomicU64::new(F64_1_0_BITS);
/// Get the current calibration factor.
pub fn get_calibration_factor() -> f64 {
f64::from_bits(CALIBRATION_FACTOR_BITS.load(Ordering::Relaxed))
}
/// Update calibration factor using exponential moving average.
///
/// Compares estimated tokens with actual tokens from API response:
/// - `ratio = actual / estimated` so underestimates push factor UP
/// - EMA: `new = current * 0.7 + ratio * 0.3`
/// - Clamped to [0.5, 2.0] to prevent runaway values
pub fn update_calibration(estimated: usize, actual: u32) {
if actual == 0 || estimated == 0 {
return;
}
let ratio = actual as f64 / estimated as f64;
let current = get_calibration_factor();
let new_factor = (current * 0.7 + ratio * 0.3).clamp(0.5, 2.0);
CALIBRATION_FACTOR_BITS.store(new_factor.to_bits(), Ordering::Relaxed);
tracing::debug!(
"[Compaction] Calibration: estimated={}, actual={}, ratio={:.2}, factor {:.2} → {:.2}",
estimated, actual, ratio, current, new_factor
);
}
/// Estimate total tokens for messages with calibration applied.
fn estimate_messages_tokens_calibrated(messages: &[Message]) -> usize {
let raw = estimate_messages_tokens(messages);
let factor = get_calibration_factor();
if (factor - 1.0).abs() < f64::EPSILON {
raw
} else {
((raw as f64 * factor).ceil()) as usize
}
}
/// Compact a message list by summarizing old messages and keeping recent ones.
///
/// When `messages.len() > keep_recent`, the oldest messages are summarized
@@ -134,7 +192,7 @@ pub fn compact_messages(messages: Vec<Message>, keep_recent: usize) -> (Vec<Mess
///
/// Returns the (possibly compacted) message list.
pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
let tokens = estimate_messages_tokens(&messages);
let tokens = estimate_messages_tokens_calibrated(&messages);
if tokens < threshold {
return messages;
}
@@ -208,7 +266,7 @@ pub async fn maybe_compact_with_config(
driver: Option<&Arc<dyn LlmDriver>>,
growth: Option<&GrowthIntegration>,
) -> CompactionOutcome {
let tokens = estimate_messages_tokens(&messages);
let tokens = estimate_messages_tokens_calibrated(&messages);
if tokens < threshold {
return CompactionOutcome {
messages,
@@ -475,10 +533,11 @@ fn generate_summary(messages: &[Message]) -> String {
let summary = sections.join("\n");
// Enforce max length
// Enforce max length (char-safe for CJK)
let max_chars = 800;
if summary.len() > max_chars {
format!("{}...\n(摘要已截断)", &summary[..max_chars])
if summary.chars().count() > max_chars {
let truncated: String = summary.chars().take(max_chars).collect();
format!("{}...\n(摘要已截断)", truncated)
} else {
summary
}