chore: 提交所有工作进度 — SaaS 后端增强、Admin UI、桌面端集成
包含大量 SaaS 平台改进、Admin 管理后台更新、桌面端集成完善、 文档同步、测试文件重构等内容。为 QA 测试准备干净工作树。
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
//! Optionally flushes old messages to the growth/memory system before discarding.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use zclaw_types::{AgentId, Message, SessionId};
|
||||
|
||||
use crate::driver::{CompletionRequest, ContentBlock, LlmDriver};
|
||||
@@ -40,9 +41,18 @@ pub fn estimate_tokens(text: &str) -> usize {
|
||||
{
|
||||
// CJK ideographs — ~1.5 tokens
|
||||
tokens += 1.5;
|
||||
} else if (0xAC00..=0xD7AF).contains(&code) || (0x1100..=0x11FF).contains(&code) {
|
||||
// Korean Hangul syllables + Jamo — ~1.5 tokens
|
||||
tokens += 1.5;
|
||||
} else if (0x3040..=0x309F).contains(&code) || (0x30A0..=0x30FF).contains(&code) {
|
||||
// Japanese Hiragana + Katakana — ~1.5 tokens
|
||||
tokens += 1.5;
|
||||
} else if (0x3000..=0x303F).contains(&code) || (0xFF00..=0xFFEF).contains(&code) {
|
||||
// CJK / fullwidth punctuation — ~1.0 token
|
||||
tokens += 1.0;
|
||||
} else if (0x1F000..=0x1FAFF).contains(&code) || (0x2600..=0x27BF).contains(&code) {
|
||||
// Emoji & Symbols — ~2.0 tokens
|
||||
tokens += 2.0;
|
||||
} else if char == ' ' || char == '\n' || char == '\t' {
|
||||
// whitespace
|
||||
tokens += 0.25;
|
||||
@@ -88,6 +98,54 @@ pub fn estimate_messages_tokens(messages: &[Message]) -> usize {
|
||||
total
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Calibration: adjust heuristic estimates using API feedback
|
||||
// ============================================================
|
||||
|
||||
const F64_1_0_BITS: u64 = 4607182418800017408u64; // 1.0f64.to_bits()
|
||||
|
||||
/// Global calibration factor for token estimation (stored as f64 bits).
|
||||
///
|
||||
/// Updated via exponential moving average when API returns actual token counts.
|
||||
/// Initial value is 1.0 (no adjustment).
|
||||
static CALIBRATION_FACTOR_BITS: AtomicU64 = AtomicU64::new(F64_1_0_BITS);
|
||||
|
||||
/// Get the current calibration factor.
|
||||
pub fn get_calibration_factor() -> f64 {
|
||||
f64::from_bits(CALIBRATION_FACTOR_BITS.load(Ordering::Relaxed))
|
||||
}
|
||||
|
||||
/// Update calibration factor using exponential moving average.
|
||||
///
|
||||
/// Compares estimated tokens with actual tokens from API response:
|
||||
/// - `ratio = actual / estimated` so underestimates push factor UP
|
||||
/// - EMA: `new = current * 0.7 + ratio * 0.3`
|
||||
/// - Clamped to [0.5, 2.0] to prevent runaway values
|
||||
pub fn update_calibration(estimated: usize, actual: u32) {
|
||||
if actual == 0 || estimated == 0 {
|
||||
return;
|
||||
}
|
||||
let ratio = actual as f64 / estimated as f64;
|
||||
let current = get_calibration_factor();
|
||||
let new_factor = (current * 0.7 + ratio * 0.3).clamp(0.5, 2.0);
|
||||
CALIBRATION_FACTOR_BITS.store(new_factor.to_bits(), Ordering::Relaxed);
|
||||
tracing::debug!(
|
||||
"[Compaction] Calibration: estimated={}, actual={}, ratio={:.2}, factor {:.2} → {:.2}",
|
||||
estimated, actual, ratio, current, new_factor
|
||||
);
|
||||
}
|
||||
|
||||
/// Estimate total tokens for messages with calibration applied.
|
||||
fn estimate_messages_tokens_calibrated(messages: &[Message]) -> usize {
|
||||
let raw = estimate_messages_tokens(messages);
|
||||
let factor = get_calibration_factor();
|
||||
if (factor - 1.0).abs() < f64::EPSILON {
|
||||
raw
|
||||
} else {
|
||||
((raw as f64 * factor).ceil()) as usize
|
||||
}
|
||||
}
|
||||
|
||||
/// Compact a message list by summarizing old messages and keeping recent ones.
|
||||
///
|
||||
/// When `messages.len() > keep_recent`, the oldest messages are summarized
|
||||
@@ -134,7 +192,7 @@ pub fn compact_messages(messages: Vec<Message>, keep_recent: usize) -> (Vec<Mess
|
||||
///
|
||||
/// Returns the (possibly compacted) message list.
|
||||
pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
|
||||
let tokens = estimate_messages_tokens(&messages);
|
||||
let tokens = estimate_messages_tokens_calibrated(&messages);
|
||||
if tokens < threshold {
|
||||
return messages;
|
||||
}
|
||||
@@ -208,7 +266,7 @@ pub async fn maybe_compact_with_config(
|
||||
driver: Option<&Arc<dyn LlmDriver>>,
|
||||
growth: Option<&GrowthIntegration>,
|
||||
) -> CompactionOutcome {
|
||||
let tokens = estimate_messages_tokens(&messages);
|
||||
let tokens = estimate_messages_tokens_calibrated(&messages);
|
||||
if tokens < threshold {
|
||||
return CompactionOutcome {
|
||||
messages,
|
||||
@@ -475,10 +533,11 @@ fn generate_summary(messages: &[Message]) -> String {
|
||||
|
||||
let summary = sections.join("\n");
|
||||
|
||||
// Enforce max length
|
||||
// Enforce max length (char-safe for CJK)
|
||||
let max_chars = 800;
|
||||
if summary.len() > max_chars {
|
||||
format!("{}...\n(摘要已截断)", &summary[..max_chars])
|
||||
if summary.chars().count() > max_chars {
|
||||
let truncated: String = summary.chars().take(max_chars).collect();
|
||||
format!("{}...\n(摘要已截断)", truncated)
|
||||
} else {
|
||||
summary
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user