refactor: 统一项目名称从OpenFang到ZCLAW
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

重构所有代码和文档中的项目名称,将OpenFang统一更新为ZCLAW。包括:
- 配置文件中的项目名称
- 代码注释和文档引用
- 环境变量和路径
- 类型定义和接口名称
- 测试用例和模拟数据

同时优化部分代码结构,移除未使用的模块,并更新相关依赖项。
This commit is contained in:
iven
2026-03-27 07:36:03 +08:00
parent 4b08804aa9
commit 0d4fa96b82
226 changed files with 7288 additions and 5788 deletions

View File

@@ -0,0 +1,365 @@
//! Context compaction for the agent loop.
//!
//! Provides rule-based token estimation and message compaction to prevent
//! conversations from exceeding LLM context windows. When the estimated
//! token count exceeds the configured threshold, older messages are
//! summarized into a single system message and only recent messages are
//! retained.
use zclaw_types::Message;
/// Number of recent messages to preserve after compaction.
const DEFAULT_KEEP_RECENT: usize = 6;
/// Heuristic token count estimation.
///
/// CJK characters ≈ 1.5 tokens each, English words ≈ 1.3 tokens each.
/// Intentionally conservative (overestimates) to avoid hitting real limits.
pub fn estimate_tokens(text: &str) -> usize {
if text.is_empty() {
return 0;
}
let mut tokens: f64 = 0.0;
for char in text.chars() {
let code = char as u32;
if (0x4E00..=0x9FFF).contains(&code)
|| (0x3400..=0x4DBF).contains(&code)
|| (0x20000..=0x2A6DF).contains(&code)
|| (0xF900..=0xFAFF).contains(&code)
{
// CJK ideographs — ~1.5 tokens
tokens += 1.5;
} else if (0x3000..=0x303F).contains(&code) || (0xFF00..=0xFFEF).contains(&code) {
// CJK / fullwidth punctuation — ~1.0 token
tokens += 1.0;
} else if char == ' ' || char == '\n' || char == '\t' {
// whitespace
tokens += 0.25;
} else {
// ASCII / Latin characters — roughly 4 chars per token
tokens += 0.3;
}
}
tokens.ceil() as usize
}
/// Estimate total tokens for a list of messages (including framing overhead).
pub fn estimate_messages_tokens(messages: &[Message]) -> usize {
let mut total = 0;
for msg in messages {
match msg {
Message::User { content } => {
total += estimate_tokens(content);
total += 4;
}
Message::Assistant { content, thinking } => {
total += estimate_tokens(content);
if let Some(th) = thinking {
total += estimate_tokens(th);
}
total += 4;
}
Message::System { content } => {
total += estimate_tokens(content);
total += 4;
}
Message::ToolUse { input, .. } => {
total += estimate_tokens(&input.to_string());
total += 4;
}
Message::ToolResult { output, .. } => {
total += estimate_tokens(&output.to_string());
total += 4;
}
}
}
total
}
/// Compact a message list by summarizing old messages and keeping recent ones.
///
/// When `messages.len() > keep_recent`, the oldest messages are summarized
/// into a single system message. System messages at the beginning of the
/// conversation are always preserved.
///
/// Returns the compacted message list and the number of original messages removed.
pub fn compact_messages(messages: Vec<Message>, keep_recent: usize) -> (Vec<Message>, usize) {
if messages.len() <= keep_recent {
return (messages, 0);
}
// Preserve leading system messages (they contain compaction summaries from prior runs)
let leading_system_count = messages
.iter()
.take_while(|m| matches!(m, Message::System { .. }))
.count();
// Calculate split point: keep leading system + recent messages
let keep_from_end = keep_recent.min(messages.len().saturating_sub(leading_system_count));
let split_index = messages.len().saturating_sub(keep_from_end);
// Ensure we keep at least the leading system messages
let split_index = split_index.max(leading_system_count);
if split_index == 0 {
return (messages, 0);
}
let old_messages = &messages[..split_index];
let recent_messages = &messages[split_index..];
let summary = generate_summary(old_messages);
let removed_count = old_messages.len();
let mut compacted = Vec::with_capacity(1 + recent_messages.len());
compacted.push(Message::system(summary));
compacted.extend(recent_messages.iter().cloned());
(compacted, removed_count)
}
/// Check if compaction should be triggered and perform it if needed.
///
/// Returns the (possibly compacted) message list.
pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
let tokens = estimate_messages_tokens(&messages);
if tokens < threshold {
return messages;
}
tracing::info!(
"[Compaction] Triggered: {} tokens > {} threshold, {} messages",
tokens,
threshold,
messages.len(),
);
let (compacted, removed) = compact_messages(messages, DEFAULT_KEEP_RECENT);
tracing::info!(
"[Compaction] Removed {} messages, {} remain",
removed,
compacted.len(),
);
compacted
}
/// Generate a rule-based summary of old messages.
fn generate_summary(messages: &[Message]) -> String {
if messages.is_empty() {
return "[对话开始]".to_string();
}
let mut sections: Vec<String> = vec!["[以下是之前对话的摘要]".to_string()];
let mut user_count = 0;
let mut assistant_count = 0;
let mut topics: Vec<String> = Vec::new();
for msg in messages {
match msg {
Message::User { content } => {
user_count += 1;
let topic = extract_topic(content);
if let Some(t) = topic {
topics.push(t);
}
}
Message::Assistant { .. } => {
assistant_count += 1;
}
Message::System { content } => {
// Skip system messages that are previous compaction summaries
if !content.starts_with("[以下是之前对话的摘要]") {
sections.push(format!("系统提示: {}", truncate(content, 60)));
}
}
Message::ToolUse { tool, .. } => {
sections.push(format!("工具调用: {}", tool.as_str()));
}
Message::ToolResult { .. } => {
// Skip tool results in summary
}
}
}
if !topics.is_empty() {
let topic_list: Vec<String> = topics.iter().take(8).cloned().collect();
sections.push(format!("讨论主题: {}", topic_list.join("; ")));
}
sections.push(format!(
"(已压缩 {} 条消息,其中用户 {} 条,助手 {} 条)",
messages.len(),
user_count,
assistant_count,
));
let summary = sections.join("\n");
// Enforce max length
let max_chars = 800;
if summary.len() > max_chars {
format!("{}...\n(摘要已截断)", &summary[..max_chars])
} else {
summary
}
}
/// Extract the main topic from a user message (first sentence or first 50 chars).
fn extract_topic(content: &str) -> Option<String> {
let trimmed = content.trim();
if trimmed.is_empty() {
return None;
}
// Find sentence end markers
for (i, char) in trimmed.char_indices() {
if char == '。' || char == '' || char == '' || char == '\n' {
let end = i + char.len_utf8();
if end <= 80 {
return Some(trimmed[..end].trim().to_string());
}
break;
}
}
if trimmed.chars().count() <= 50 {
return Some(trimmed.to_string());
}
Some(format!("{}...", trimmed.chars().take(50).collect::<String>()))
}
/// Truncate text to max_chars at char boundary.
fn truncate(text: &str, max_chars: usize) -> String {
if text.chars().count() <= max_chars {
return text.to_string();
}
let truncated: String = text.chars().take(max_chars).collect();
format!("{}...", truncated)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_estimate_tokens_empty() {
assert_eq!(estimate_tokens(""), 0);
}
#[test]
fn test_estimate_tokens_english() {
let tokens = estimate_tokens("Hello world");
assert!(tokens > 0);
}
#[test]
fn test_estimate_tokens_cjk() {
let tokens = estimate_tokens("你好世界");
assert!(tokens > 3); // CJK chars are ~1.5 tokens each
}
#[test]
fn test_estimate_messages_tokens() {
let messages = vec![
Message::user("Hello"),
Message::assistant("Hi there"),
];
let tokens = estimate_messages_tokens(&messages);
assert!(tokens > 0);
}
#[test]
fn test_compact_messages_under_threshold() {
let messages = vec![
Message::user("Hello"),
Message::assistant("Hi"),
];
let (result, removed) = compact_messages(messages, 6);
assert_eq!(removed, 0);
assert_eq!(result.len(), 2);
}
#[test]
fn test_compact_messages_over_threshold() {
let messages: Vec<Message> = (0..10)
.flat_map(|i| {
vec![
Message::user(format!("Question {}", i)),
Message::assistant(format!("Answer {}", i)),
]
})
.collect();
let (result, removed) = compact_messages(messages, 4);
assert!(removed > 0);
// Should have: 1 summary + 4 recent messages
assert_eq!(result.len(), 5);
// First message should be a system summary
assert!(matches!(&result[0], Message::System { .. }));
}
#[test]
fn test_compact_preserves_leading_system() {
let messages = vec![
Message::system("You are helpful"),
Message::user("Q1"),
Message::assistant("A1"),
Message::user("Q2"),
Message::assistant("A2"),
Message::user("Q3"),
Message::assistant("A3"),
];
let (result, removed) = compact_messages(messages, 4);
assert!(removed > 0);
// Should start with compaction summary, then recent messages
assert!(matches!(&result[0], Message::System { .. }));
}
#[test]
fn test_maybe_compact_under_threshold() {
let messages = vec![
Message::user("Short message"),
Message::assistant("Short reply"),
];
let result = maybe_compact(messages, 100_000);
assert_eq!(result.len(), 2);
}
#[test]
fn test_extract_topic_sentence() {
let topic = extract_topic("什么是Rust的所有权系统").unwrap();
assert!(topic.contains("所有权"));
}
#[test]
fn test_extract_topic_short() {
let topic = extract_topic("Hello").unwrap();
assert_eq!(topic, "Hello");
}
#[test]
fn test_extract_topic_long() {
let long = "This is a very long message that exceeds fifty characters in total length";
let topic = extract_topic(long).unwrap();
assert!(topic.ends_with("..."));
}
#[test]
fn test_generate_summary() {
let messages = vec![
Message::user("What is Rust?"),
Message::assistant("Rust is a systems programming language"),
Message::user("How does ownership work?"),
Message::assistant("Ownership is Rust's memory management system"),
];
let summary = generate_summary(&messages);
assert!(summary.contains("摘要"));
assert!(summary.contains("2"));
}
}