chore: 提交所有工作进度 — SaaS 后端增强、Admin UI、桌面端集成

包含大量 SaaS 平台改进、Admin 管理后台更新、桌面端集成完善、文档同步、测试文件重构等内容。为 QA 测试准备干净工作树。
2026-03-29 10:46:26 +08:00
parent 9a5fad2b59
commit 5fdf96c3f5
268 changed files with 22011 additions and 3886 deletions
--- a/crates/zclaw-runtime/src/compaction.rs
+++ b/crates/zclaw-runtime/src/compaction.rs
@@ -13,6 +13,7 @@
 //! Optionally flushes old messages to the growth/memory system before discarding.

 use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
 use zclaw_types::{AgentId, Message, SessionId};

 use crate::driver::{CompletionRequest, ContentBlock, LlmDriver};
@@ -40,9 +41,18 @@ pub fn estimate_tokens(text: &str) -> usize {
        {
            // CJK ideographs — ~1.5 tokens
            tokens += 1.5;
+        } else if (0xAC00..=0xD7AF).contains(&code) || (0x1100..=0x11FF).contains(&code) {
+            // Korean Hangul syllables + Jamo — ~1.5 tokens
+            tokens += 1.5;
+        } else if (0x3040..=0x309F).contains(&code) || (0x30A0..=0x30FF).contains(&code) {
+            // Japanese Hiragana + Katakana — ~1.5 tokens
+            tokens += 1.5;
        } else if (0x3000..=0x303F).contains(&code) || (0xFF00..=0xFFEF).contains(&code) {
            // CJK / fullwidth punctuation — ~1.0 token
            tokens += 1.0;
+        } else if (0x1F000..=0x1FAFF).contains(&code) || (0x2600..=0x27BF).contains(&code) {
+            // Emoji & Symbols — ~2.0 tokens
+            tokens += 2.0;
        } else if char == ' ' || char == '\n' || char == '\t' {
            // whitespace
            tokens += 0.25;
@@ -88,6 +98,54 @@ pub fn estimate_messages_tokens(messages: &[Message]) -> usize {
    total
 }

+// ============================================================
+// Calibration: adjust heuristic estimates using API feedback
+// ============================================================
+
+const F64_1_0_BITS: u64 = 4607182418800017408u64; // 1.0f64.to_bits()
+
+/// Global calibration factor for token estimation (stored as f64 bits).
+///
+/// Updated via exponential moving average when API returns actual token counts.
+/// Initial value is 1.0 (no adjustment).
+static CALIBRATION_FACTOR_BITS: AtomicU64 = AtomicU64::new(F64_1_0_BITS);
+
+/// Get the current calibration factor.
+pub fn get_calibration_factor() -> f64 {
+    f64::from_bits(CALIBRATION_FACTOR_BITS.load(Ordering::Relaxed))
+}
+
+/// Update calibration factor using exponential moving average.
+///
+/// Compares estimated tokens with actual tokens from API response:
+/// - `ratio = actual / estimated` so underestimates push factor UP
+/// - EMA: `new = current * 0.7 + ratio * 0.3`
+/// - Clamped to [0.5, 2.0] to prevent runaway values
+pub fn update_calibration(estimated: usize, actual: u32) {
+    if actual == 0 || estimated == 0 {
+        return;
+    }
+    let ratio = actual as f64 / estimated as f64;
+    let current = get_calibration_factor();
+    let new_factor = (current * 0.7 + ratio * 0.3).clamp(0.5, 2.0);
+    CALIBRATION_FACTOR_BITS.store(new_factor.to_bits(), Ordering::Relaxed);
+    tracing::debug!(
+        "[Compaction] Calibration: estimated={}, actual={}, ratio={:.2}, factor {:.2} → {:.2}",
+        estimated, actual, ratio, current, new_factor
+    );
+}
+
+/// Estimate total tokens for messages with calibration applied.
+fn estimate_messages_tokens_calibrated(messages: &[Message]) -> usize {
+    let raw = estimate_messages_tokens(messages);
+    let factor = get_calibration_factor();
+    if (factor - 1.0).abs() < f64::EPSILON {
+        raw
+    } else {
+        ((raw as f64 * factor).ceil()) as usize
+    }
+}
+
 /// Compact a message list by summarizing old messages and keeping recent ones.
 ///
 /// When `messages.len() > keep_recent`, the oldest messages are summarized
@@ -134,7 +192,7 @@ pub fn compact_messages(messages: Vec<Message>, keep_recent: usize) -> (Vec<Mess
 ///
 /// Returns the (possibly compacted) message list.
 pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
-    let tokens = estimate_messages_tokens(&messages);
+    let tokens = estimate_messages_tokens_calibrated(&messages);
    if tokens < threshold {
        return messages;
    }
@@ -208,7 +266,7 @@ pub async fn maybe_compact_with_config(
    driver: Option<&Arc<dyn LlmDriver>>,
    growth: Option<&GrowthIntegration>,
 ) -> CompactionOutcome {
-    let tokens = estimate_messages_tokens(&messages);
+    let tokens = estimate_messages_tokens_calibrated(&messages);
    if tokens < threshold {
        return CompactionOutcome {
            messages,
@@ -475,10 +533,11 @@ fn generate_summary(messages: &[Message]) -> String {

    let summary = sections.join("\n");

-    // Enforce max length
+    // Enforce max length (char-safe for CJK)
    let max_chars = 800;
-    if summary.len() > max_chars {
-        format!("{}...\n(摘要已截断)", &summary[..max_chars])
+    if summary.chars().count() > max_chars {
+        let truncated: String = summary.chars().take(max_chars).collect();
+        format!("{}...\n(摘要已截断)", truncated)
    } else {
        summary
    }