chore: 提交所有工作进度 — SaaS 后端增强、Admin UI、桌面端集成
包含大量 SaaS 平台改进、Admin 管理后台更新、桌面端集成完善、 文档同步、测试文件重构等内容。为 QA 测试准备干净工作树。
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
//! Optionally flushes old messages to the growth/memory system before discarding.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use zclaw_types::{AgentId, Message, SessionId};
|
||||
|
||||
use crate::driver::{CompletionRequest, ContentBlock, LlmDriver};
|
||||
@@ -40,9 +41,18 @@ pub fn estimate_tokens(text: &str) -> usize {
|
||||
{
|
||||
// CJK ideographs — ~1.5 tokens
|
||||
tokens += 1.5;
|
||||
} else if (0xAC00..=0xD7AF).contains(&code) || (0x1100..=0x11FF).contains(&code) {
|
||||
// Korean Hangul syllables + Jamo — ~1.5 tokens
|
||||
tokens += 1.5;
|
||||
} else if (0x3040..=0x309F).contains(&code) || (0x30A0..=0x30FF).contains(&code) {
|
||||
// Japanese Hiragana + Katakana — ~1.5 tokens
|
||||
tokens += 1.5;
|
||||
} else if (0x3000..=0x303F).contains(&code) || (0xFF00..=0xFFEF).contains(&code) {
|
||||
// CJK / fullwidth punctuation — ~1.0 token
|
||||
tokens += 1.0;
|
||||
} else if (0x1F000..=0x1FAFF).contains(&code) || (0x2600..=0x27BF).contains(&code) {
|
||||
// Emoji & Symbols — ~2.0 tokens
|
||||
tokens += 2.0;
|
||||
} else if char == ' ' || char == '\n' || char == '\t' {
|
||||
// whitespace
|
||||
tokens += 0.25;
|
||||
@@ -88,6 +98,54 @@ pub fn estimate_messages_tokens(messages: &[Message]) -> usize {
|
||||
total
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Calibration: adjust heuristic estimates using API feedback
|
||||
// ============================================================
|
||||
|
||||
const F64_1_0_BITS: u64 = 4607182418800017408u64; // 1.0f64.to_bits()
|
||||
|
||||
/// Global calibration factor for token estimation (stored as f64 bits).
|
||||
///
|
||||
/// Updated via exponential moving average when API returns actual token counts.
|
||||
/// Initial value is 1.0 (no adjustment).
|
||||
static CALIBRATION_FACTOR_BITS: AtomicU64 = AtomicU64::new(F64_1_0_BITS);
|
||||
|
||||
/// Get the current calibration factor.
|
||||
pub fn get_calibration_factor() -> f64 {
|
||||
f64::from_bits(CALIBRATION_FACTOR_BITS.load(Ordering::Relaxed))
|
||||
}
|
||||
|
||||
/// Update calibration factor using exponential moving average.
|
||||
///
|
||||
/// Compares estimated tokens with actual tokens from API response:
|
||||
/// - `ratio = actual / estimated` so underestimates push factor UP
|
||||
/// - EMA: `new = current * 0.7 + ratio * 0.3`
|
||||
/// - Clamped to [0.5, 2.0] to prevent runaway values
|
||||
pub fn update_calibration(estimated: usize, actual: u32) {
|
||||
if actual == 0 || estimated == 0 {
|
||||
return;
|
||||
}
|
||||
let ratio = actual as f64 / estimated as f64;
|
||||
let current = get_calibration_factor();
|
||||
let new_factor = (current * 0.7 + ratio * 0.3).clamp(0.5, 2.0);
|
||||
CALIBRATION_FACTOR_BITS.store(new_factor.to_bits(), Ordering::Relaxed);
|
||||
tracing::debug!(
|
||||
"[Compaction] Calibration: estimated={}, actual={}, ratio={:.2}, factor {:.2} → {:.2}",
|
||||
estimated, actual, ratio, current, new_factor
|
||||
);
|
||||
}
|
||||
|
||||
/// Estimate total tokens for messages with calibration applied.
|
||||
fn estimate_messages_tokens_calibrated(messages: &[Message]) -> usize {
|
||||
let raw = estimate_messages_tokens(messages);
|
||||
let factor = get_calibration_factor();
|
||||
if (factor - 1.0).abs() < f64::EPSILON {
|
||||
raw
|
||||
} else {
|
||||
((raw as f64 * factor).ceil()) as usize
|
||||
}
|
||||
}
|
||||
|
||||
/// Compact a message list by summarizing old messages and keeping recent ones.
|
||||
///
|
||||
/// When `messages.len() > keep_recent`, the oldest messages are summarized
|
||||
@@ -134,7 +192,7 @@ pub fn compact_messages(messages: Vec<Message>, keep_recent: usize) -> (Vec<Mess
|
||||
///
|
||||
/// Returns the (possibly compacted) message list.
|
||||
pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
|
||||
let tokens = estimate_messages_tokens(&messages);
|
||||
let tokens = estimate_messages_tokens_calibrated(&messages);
|
||||
if tokens < threshold {
|
||||
return messages;
|
||||
}
|
||||
@@ -208,7 +266,7 @@ pub async fn maybe_compact_with_config(
|
||||
driver: Option<&Arc<dyn LlmDriver>>,
|
||||
growth: Option<&GrowthIntegration>,
|
||||
) -> CompactionOutcome {
|
||||
let tokens = estimate_messages_tokens(&messages);
|
||||
let tokens = estimate_messages_tokens_calibrated(&messages);
|
||||
if tokens < threshold {
|
||||
return CompactionOutcome {
|
||||
messages,
|
||||
@@ -475,10 +533,11 @@ fn generate_summary(messages: &[Message]) -> String {
|
||||
|
||||
let summary = sections.join("\n");
|
||||
|
||||
// Enforce max length
|
||||
// Enforce max length (char-safe for CJK)
|
||||
let max_chars = 800;
|
||||
if summary.len() > max_chars {
|
||||
format!("{}...\n(摘要已截断)", &summary[..max_chars])
|
||||
if summary.chars().count() > max_chars {
|
||||
let truncated: String = summary.chars().take(max_chars).collect();
|
||||
format!("{}...\n(摘要已截断)", truncated)
|
||||
} else {
|
||||
summary
|
||||
}
|
||||
|
||||
@@ -130,7 +130,8 @@ impl LlmDriver for OpenAiDriver {
|
||||
let api_key = self.api_key.expose_secret().to_string();
|
||||
|
||||
Box::pin(stream! {
|
||||
tracing::debug!("[OpenAiDriver:stream] Starting HTTP request...");
|
||||
println!("[OpenAI:stream] POST to {}/chat/completions", base_url);
|
||||
println!("[OpenAI:stream] Request model={}, stream={}", stream_request.model, stream_request.stream);
|
||||
let response = match self.client
|
||||
.post(format!("{}/chat/completions", base_url))
|
||||
.header("Authorization", format!("Bearer {}", api_key))
|
||||
@@ -141,11 +142,11 @@ impl LlmDriver for OpenAiDriver {
|
||||
.await
|
||||
{
|
||||
Ok(r) => {
|
||||
tracing::debug!("[OpenAiDriver:stream] Got response, status: {}", r.status());
|
||||
println!("[OpenAI:stream] Response status: {}, content-type: {:?}", r.status(), r.headers().get("content-type"));
|
||||
r
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::error!("[OpenAiDriver:stream] HTTP request failed: {:?}", e);
|
||||
println!("[OpenAI:stream] HTTP request FAILED: {:?}", e);
|
||||
yield Err(ZclawError::LlmError(format!("HTTP request failed: {}", e)));
|
||||
return;
|
||||
}
|
||||
@@ -154,6 +155,7 @@ impl LlmDriver for OpenAiDriver {
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
println!("[OpenAI:stream] API error {}: {}", status, &body[..body.len().min(500)]);
|
||||
yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
|
||||
return;
|
||||
}
|
||||
@@ -161,21 +163,45 @@ impl LlmDriver for OpenAiDriver {
|
||||
let mut byte_stream = response.bytes_stream();
|
||||
let mut accumulated_tool_calls: std::collections::HashMap<String, (String, String)> = std::collections::HashMap::new();
|
||||
let mut current_tool_id: Option<String> = None;
|
||||
let mut sse_event_count: usize = 0;
|
||||
let mut raw_bytes_total: usize = 0;
|
||||
|
||||
while let Some(chunk_result) = byte_stream.next().await {
|
||||
let chunk = match chunk_result {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
println!("[OpenAI:stream] Byte stream error: {:?}", e);
|
||||
yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
raw_bytes_total += chunk.len();
|
||||
let text = String::from_utf8_lossy(&chunk);
|
||||
// Log first 500 bytes of raw data for debugging SSE format
|
||||
if raw_bytes_total <= 600 {
|
||||
println!("[OpenAI:stream] RAW chunk ({} bytes): {:?}", text.len(), &text[..text.len().min(500)]);
|
||||
}
|
||||
for line in text.lines() {
|
||||
if let Some(data) = line.strip_prefix("data: ") {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() || trimmed.starts_with(':') {
|
||||
continue; // Skip empty lines and SSE comments
|
||||
}
|
||||
// Handle both "data: " (standard) and "data:" (no space)
|
||||
let data = if let Some(d) = trimmed.strip_prefix("data: ") {
|
||||
Some(d)
|
||||
} else if let Some(d) = trimmed.strip_prefix("data:") {
|
||||
Some(d.trim_start())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if let Some(data) = data {
|
||||
sse_event_count += 1;
|
||||
if sse_event_count <= 3 || data == "[DONE]" {
|
||||
println!("[OpenAI:stream] SSE #{}: {}", sse_event_count, &data[..data.len().min(300)]);
|
||||
}
|
||||
if data == "[DONE]" {
|
||||
tracing::debug!("[OpenAI] Stream done, accumulated_tool_calls: {:?}", accumulated_tool_calls.len());
|
||||
println!("[OpenAI:stream] Received [DONE], total SSE events: {}, raw bytes: {}", sse_event_count, raw_bytes_total);
|
||||
|
||||
// Emit ToolUseEnd for all accumulated tool calls (skip invalid ones with empty name)
|
||||
for (id, (name, args)) in &accumulated_tool_calls {
|
||||
@@ -216,10 +242,19 @@ impl LlmDriver for OpenAiDriver {
|
||||
// Handle text content
|
||||
if let Some(content) = &delta.content {
|
||||
if !content.is_empty() {
|
||||
tracing::debug!("[OpenAI:stream] TextDelta: {} chars", content.len());
|
||||
yield Ok(StreamChunk::TextDelta { delta: content.clone() });
|
||||
}
|
||||
}
|
||||
|
||||
// Handle reasoning_content (Kimi, Qwen, DeepSeek, GLM thinking)
|
||||
if let Some(reasoning) = &delta.reasoning_content {
|
||||
if !reasoning.is_empty() {
|
||||
tracing::debug!("[OpenAI:stream] ThinkingDelta (reasoning_content): {} chars", reasoning.len());
|
||||
yield Ok(StreamChunk::ThinkingDelta { delta: reasoning.clone() });
|
||||
}
|
||||
}
|
||||
|
||||
// Handle tool calls
|
||||
if let Some(tool_calls) = &delta.tool_calls {
|
||||
tracing::trace!("[OpenAI] Received tool_calls delta: {:?}", tool_calls);
|
||||
@@ -284,6 +319,7 @@ impl LlmDriver for OpenAiDriver {
|
||||
}
|
||||
}
|
||||
}
|
||||
println!("[OpenAI:stream] Byte stream ended. Total: {} SSE events, {} raw bytes", sse_event_count, raw_bytes_total);
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -304,55 +340,122 @@ impl OpenAiDriver {
|
||||
request.system.clone()
|
||||
};
|
||||
|
||||
let messages: Vec<OpenAiMessage> = request.messages
|
||||
.iter()
|
||||
.filter_map(|msg| match msg {
|
||||
zclaw_types::Message::User { content } => Some(OpenAiMessage {
|
||||
role: "user".to_string(),
|
||||
content: Some(content.clone()),
|
||||
tool_calls: None,
|
||||
}),
|
||||
zclaw_types::Message::Assistant { content, thinking: _ } => Some(OpenAiMessage {
|
||||
// Build messages with tool result truncation to prevent payload overflow.
|
||||
// Most LLM APIs have a 2-4MB HTTP payload limit.
|
||||
const MAX_TOOL_RESULT_BYTES: usize = 32_768; // 32KB per tool result
|
||||
const MAX_PAYLOAD_BYTES: usize = 1_800_000; // 1.8MB (under 2MB API limit)
|
||||
|
||||
let mut messages: Vec<OpenAiMessage> = Vec::new();
|
||||
let mut pending_tool_calls: Option<Vec<OpenAiToolCall>> = None;
|
||||
let mut pending_content: Option<String> = None;
|
||||
let mut pending_reasoning: Option<String> = None;
|
||||
|
||||
let flush_pending = |tc: &mut Option<Vec<OpenAiToolCall>>,
|
||||
c: &mut Option<String>,
|
||||
r: &mut Option<String>,
|
||||
out: &mut Vec<OpenAiMessage>| {
|
||||
let calls = tc.take();
|
||||
let content = c.take();
|
||||
let reasoning = r.take();
|
||||
|
||||
if let Some(calls) = calls {
|
||||
if !calls.is_empty() {
|
||||
// Merge assistant content + reasoning into the tool call message
|
||||
out.push(OpenAiMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: content.filter(|s| !s.is_empty()),
|
||||
reasoning_content: reasoning.filter(|s| !s.is_empty()),
|
||||
tool_calls: Some(calls),
|
||||
tool_call_id: None,
|
||||
});
|
||||
return;
|
||||
}
|
||||
}
|
||||
// No tool calls — emit a plain assistant message
|
||||
if content.is_some() || reasoning.is_some() {
|
||||
out.push(OpenAiMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: Some(content.clone()),
|
||||
content: content.filter(|s| !s.is_empty()),
|
||||
reasoning_content: reasoning.filter(|s| !s.is_empty()),
|
||||
tool_calls: None,
|
||||
}),
|
||||
zclaw_types::Message::System { content } => Some(OpenAiMessage {
|
||||
role: "system".to_string(),
|
||||
content: Some(content.clone()),
|
||||
tool_calls: None,
|
||||
}),
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
for msg in &request.messages {
|
||||
match msg {
|
||||
zclaw_types::Message::User { content } => {
|
||||
flush_pending(&mut pending_tool_calls, &mut pending_content, &mut pending_reasoning, &mut messages);
|
||||
messages.push(OpenAiMessage {
|
||||
role: "user".to_string(),
|
||||
content: Some(content.clone()),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
reasoning_content: None,
|
||||
});
|
||||
}
|
||||
zclaw_types::Message::Assistant { content, thinking } => {
|
||||
flush_pending(&mut pending_tool_calls, &mut pending_content, &mut pending_reasoning, &mut messages);
|
||||
// Don't push immediately — wait to see if next messages are ToolUse
|
||||
pending_content = Some(content.clone());
|
||||
pending_reasoning = thinking.clone();
|
||||
}
|
||||
zclaw_types::Message::System { content } => {
|
||||
flush_pending(&mut pending_tool_calls, &mut pending_content, &mut pending_reasoning, &mut messages);
|
||||
messages.push(OpenAiMessage {
|
||||
role: "system".to_string(),
|
||||
content: Some(content.clone()),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
reasoning_content: None,
|
||||
});
|
||||
}
|
||||
zclaw_types::Message::ToolUse { id, tool, input } => {
|
||||
// Ensure arguments is always a valid JSON object, never null or invalid
|
||||
// Accumulate tool calls — they'll be merged with the pending assistant message
|
||||
let args = if input.is_null() {
|
||||
"{}".to_string()
|
||||
} else {
|
||||
serde_json::to_string(input).unwrap_or_else(|_| "{}".to_string())
|
||||
};
|
||||
Some(OpenAiMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: None,
|
||||
tool_calls: Some(vec![OpenAiToolCall {
|
||||
pending_tool_calls
|
||||
.get_or_insert_with(Vec::new)
|
||||
.push(OpenAiToolCall {
|
||||
id: id.clone(),
|
||||
r#type: "function".to_string(),
|
||||
function: FunctionCall {
|
||||
name: tool.to_string(),
|
||||
arguments: args,
|
||||
},
|
||||
}]),
|
||||
})
|
||||
});
|
||||
}
|
||||
zclaw_types::Message::ToolResult { tool_call_id: _, output, is_error, .. } => Some(OpenAiMessage {
|
||||
role: "tool".to_string(),
|
||||
content: Some(if *is_error {
|
||||
zclaw_types::Message::ToolResult { tool_call_id, output, is_error, .. } => {
|
||||
flush_pending(&mut pending_tool_calls, &mut pending_content, &mut pending_reasoning, &mut messages);
|
||||
let content_str = if *is_error {
|
||||
format!("Error: {}", output)
|
||||
} else {
|
||||
output.to_string()
|
||||
}),
|
||||
tool_calls: None,
|
||||
}),
|
||||
})
|
||||
.collect();
|
||||
};
|
||||
// Truncate oversized tool results to prevent payload overflow
|
||||
let truncated = if content_str.len() > MAX_TOOL_RESULT_BYTES {
|
||||
let mut s = String::from(&content_str[..MAX_TOOL_RESULT_BYTES]);
|
||||
s.push_str("\n\n... [内容已截断,原文过大]");
|
||||
s
|
||||
} else {
|
||||
content_str
|
||||
};
|
||||
messages.push(OpenAiMessage {
|
||||
role: "tool".to_string(),
|
||||
content: Some(truncated),
|
||||
tool_calls: None,
|
||||
tool_call_id: Some(tool_call_id.clone()),
|
||||
reasoning_content: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// Flush any remaining accumulated assistant content and/or tool calls
|
||||
flush_pending(&mut pending_tool_calls, &mut pending_content, &mut pending_reasoning, &mut messages);
|
||||
|
||||
// Add system prompt if provided
|
||||
let mut messages = messages;
|
||||
@@ -361,6 +464,8 @@ impl OpenAiDriver {
|
||||
role: "system".to_string(),
|
||||
content: Some(system.clone()),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
reasoning_content: None,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -376,7 +481,7 @@ impl OpenAiDriver {
|
||||
})
|
||||
.collect();
|
||||
|
||||
OpenAiRequest {
|
||||
let api_request = OpenAiRequest {
|
||||
model: request.model.clone(), // Use model ID directly without any transformation
|
||||
messages,
|
||||
max_tokens: request.max_tokens,
|
||||
@@ -384,7 +489,75 @@ impl OpenAiDriver {
|
||||
stop: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
|
||||
stream: request.stream,
|
||||
tools: if tools.is_empty() { None } else { Some(tools) },
|
||||
};
|
||||
|
||||
// Pre-send payload size validation
|
||||
if let Ok(serialized) = serde_json::to_string(&api_request) {
|
||||
if serialized.len() > MAX_PAYLOAD_BYTES {
|
||||
tracing::warn!(
|
||||
target: "openai_driver",
|
||||
"Request payload too large: {} bytes (limit: {}), truncating messages",
|
||||
serialized.len(),
|
||||
MAX_PAYLOAD_BYTES
|
||||
);
|
||||
return Self::truncate_messages_to_fit(api_request, MAX_PAYLOAD_BYTES);
|
||||
}
|
||||
tracing::debug!(
|
||||
target: "openai_driver",
|
||||
"Request payload size: {} bytes (limit: {})",
|
||||
serialized.len(),
|
||||
MAX_PAYLOAD_BYTES
|
||||
);
|
||||
}
|
||||
|
||||
api_request
|
||||
}
|
||||
|
||||
/// Emergency truncation: drop oldest non-system messages until payload fits
|
||||
fn truncate_messages_to_fit(mut request: OpenAiRequest, _max_bytes: usize) -> OpenAiRequest {
|
||||
// Keep system message (if any) and last 4 non-system messages
|
||||
let has_system = request.messages.first()
|
||||
.map(|m| m.role == "system")
|
||||
.unwrap_or(false);
|
||||
|
||||
let non_system: Vec<OpenAiMessage> = request.messages.into_iter()
|
||||
.filter(|m| m.role != "system")
|
||||
.collect();
|
||||
|
||||
// Keep last N messages and truncate any remaining large tool results
|
||||
let keep_count = 4.min(non_system.len());
|
||||
let start = non_system.len() - keep_count;
|
||||
let kept: Vec<OpenAiMessage> = non_system.into_iter()
|
||||
.skip(start)
|
||||
.map(|mut msg| {
|
||||
// Additional per-message truncation for tool results
|
||||
if msg.role == "tool" {
|
||||
if let Some(ref content) = msg.content {
|
||||
if content.len() > 16_384 {
|
||||
let mut s = String::from(&content[..16_384]);
|
||||
s.push_str("\n\n... [上下文压缩截断]");
|
||||
msg.content = Some(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
msg
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut messages = Vec::new();
|
||||
if has_system {
|
||||
messages.push(OpenAiMessage {
|
||||
role: "system".to_string(),
|
||||
content: Some("You are a helpful AI assistant. (注意:对话历史已被压缩以适应上下文大小限制)".to_string()),
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
reasoning_content: None,
|
||||
});
|
||||
}
|
||||
messages.extend(kept);
|
||||
|
||||
request.messages = messages;
|
||||
request
|
||||
}
|
||||
|
||||
fn convert_response(&self, api_response: OpenAiResponse, model: String) -> CompletionResponse {
|
||||
@@ -398,6 +571,7 @@ impl OpenAiDriver {
|
||||
// This is important because some providers return empty content with tool_calls
|
||||
let has_tool_calls = c.message.tool_calls.as_ref().map(|tc| !tc.is_empty()).unwrap_or(false);
|
||||
let has_content = c.message.content.as_ref().map(|t| !t.is_empty()).unwrap_or(false);
|
||||
let has_reasoning = c.message.reasoning_content.as_ref().map(|t| !t.is_empty()).unwrap_or(false);
|
||||
|
||||
let blocks = if has_tool_calls {
|
||||
// Tool calls take priority
|
||||
@@ -413,6 +587,11 @@ impl OpenAiDriver {
|
||||
let text = c.message.content.as_ref().unwrap();
|
||||
tracing::debug!("[OpenAiDriver:convert_response] Using text content: {} chars", text.len());
|
||||
vec![ContentBlock::Text { text: text.clone() }]
|
||||
} else if has_reasoning {
|
||||
// Content empty but reasoning_content present (Kimi, Qwen, DeepSeek)
|
||||
let reasoning = c.message.reasoning_content.as_ref().unwrap();
|
||||
tracing::debug!("[OpenAiDriver:convert_response] Using reasoning_content: {} chars", reasoning.len());
|
||||
vec![ContentBlock::Text { text: reasoning.clone() }]
|
||||
} else {
|
||||
// No content or tool_calls
|
||||
tracing::debug!("[OpenAiDriver:convert_response] No content or tool_calls, using empty text");
|
||||
@@ -594,6 +773,10 @@ struct OpenAiMessage {
|
||||
content: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
tool_calls: Option<Vec<OpenAiToolCall>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
tool_call_id: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
reasoning_content: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -656,6 +839,8 @@ struct OpenAiResponseMessage {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
#[serde(default)]
|
||||
reasoning_content: Option<String>,
|
||||
#[serde(default)]
|
||||
tool_calls: Option<Vec<OpenAiToolCallResponse>>,
|
||||
}
|
||||
|
||||
@@ -705,6 +890,8 @@ struct OpenAiDelta {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
#[serde(default)]
|
||||
reasoning_content: Option<String>,
|
||||
#[serde(default)]
|
||||
tool_calls: Option<Vec<OpenAiToolCallDelta>>,
|
||||
}
|
||||
|
||||
|
||||
@@ -4,22 +4,14 @@
|
||||
//! enabling automatic memory retrieval before conversations and memory extraction
|
||||
//! after conversations.
|
||||
//!
|
||||
//! # Usage
|
||||
//! **Note (2026-03-27 audit)**: In the Tauri desktop deployment, this module is
|
||||
//! NOT wired into the Kernel. The intelligence_hooks module in desktop/src-tauri
|
||||
//! provides the same functionality (memory retrieval, heartbeat, reflection) via
|
||||
//! direct VikingStorage calls. GrowthIntegration remains available for future
|
||||
//! use (e.g., headless/server deployments where intelligence_hooks is not available).
|
||||
//!
|
||||
//! ```rust,ignore
|
||||
//! use zclaw_runtime::growth::GrowthIntegration;
|
||||
//! use zclaw_growth::{VikingAdapter, MemoryExtractor, MemoryRetriever, PromptInjector};
|
||||
//!
|
||||
//! // Create growth integration
|
||||
//! let viking = Arc::new(VikingAdapter::in_memory());
|
||||
//! let growth = GrowthIntegration::new(viking);
|
||||
//!
|
||||
//! // Before conversation: enhance system prompt
|
||||
//! let enhanced_prompt = growth.enhance_prompt(&agent_id, &base_prompt, &user_input).await?;
|
||||
//!
|
||||
//! // After conversation: extract and store memories
|
||||
//! growth.process_conversation(&agent_id, &messages, session_id).await?;
|
||||
//! ```
|
||||
//! The `AgentLoop.growth` field defaults to `None` and the code gracefully falls
|
||||
//! through to normal behavior when not set.
|
||||
|
||||
use std::sync::Arc;
|
||||
use zclaw_growth::{
|
||||
|
||||
@@ -3,8 +3,10 @@
|
||||
//! LLM drivers, tool system, and agent loop implementation.
|
||||
|
||||
/// Default User-Agent header sent with all outgoing HTTP requests.
|
||||
/// Some LLM providers (e.g. Moonshot, Qwen, DashScope Coding Plan) reject requests without one.
|
||||
pub const USER_AGENT: &str = "ZCLAW/0.1.0";
|
||||
/// Coding Plan providers (Kimi, Bailian/DashScope, Zhipu) validate the User-Agent against a
|
||||
/// whitelist of known Coding Agents (e.g. claude-code, kimi-cli, roo-code, kilo-code).
|
||||
/// Must use the exact lowercase format to pass validation.
|
||||
pub const USER_AGENT: &str = "claude-code/0.1.0";
|
||||
|
||||
pub mod driver;
|
||||
pub mod tool;
|
||||
|
||||
@@ -131,12 +131,30 @@ impl AgentLoop {
|
||||
|
||||
/// Create tool context for tool execution
|
||||
fn create_tool_context(&self, session_id: SessionId) -> ToolContext {
|
||||
// If no path_validator is configured, create a default one with user home as workspace.
|
||||
// This allows file_read/file_write tools to work without explicit workspace config,
|
||||
// while still restricting access to the user's home directory for security.
|
||||
let path_validator = self.path_validator.clone().unwrap_or_else(|| {
|
||||
let home = std::env::var("USERPROFILE")
|
||||
.or_else(|_| std::env::var("HOME"))
|
||||
.unwrap_or_else(|_| ".".to_string());
|
||||
let home_path = std::path::PathBuf::from(&home);
|
||||
tracing::info!(
|
||||
"[AgentLoop] No path_validator configured, using user home as workspace: {}",
|
||||
home_path.display()
|
||||
);
|
||||
PathValidator::new().with_workspace(home_path)
|
||||
});
|
||||
|
||||
let working_dir = path_validator.workspace_root()
|
||||
.map(|p| p.to_string_lossy().to_string());
|
||||
|
||||
ToolContext {
|
||||
agent_id: self.agent_id.clone(),
|
||||
working_directory: None,
|
||||
working_directory: working_dir,
|
||||
session_id: Some(session_id.to_string()),
|
||||
skill_executor: self.skill_executor.clone(),
|
||||
path_validator: self.path_validator.clone(),
|
||||
path_validator: Some(path_validator),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -222,6 +240,14 @@ impl AgentLoop {
|
||||
total_input_tokens += response.input_tokens;
|
||||
total_output_tokens += response.output_tokens;
|
||||
|
||||
// Calibrate token estimation on first iteration
|
||||
if iterations == 1 {
|
||||
compaction::update_calibration(
|
||||
compaction::estimate_messages_tokens(&messages),
|
||||
response.input_tokens,
|
||||
);
|
||||
}
|
||||
|
||||
// Extract tool calls from response
|
||||
let tool_calls: Vec<(String, String, serde_json::Value)> = response.content.iter()
|
||||
.filter_map(|block| match block {
|
||||
@@ -230,30 +256,49 @@ impl AgentLoop {
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Extract text and thinking separately
|
||||
let text_parts: Vec<String> = response.content.iter()
|
||||
.filter_map(|block| match block {
|
||||
ContentBlock::Text { text } => Some(text.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let thinking_parts: Vec<String> = response.content.iter()
|
||||
.filter_map(|block| match block {
|
||||
ContentBlock::Thinking { thinking } => Some(thinking.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let text_content = text_parts.join("\n");
|
||||
let thinking_content = if thinking_parts.is_empty() { None } else { Some(thinking_parts.join("")) };
|
||||
|
||||
// If no tool calls, we have the final response
|
||||
if tool_calls.is_empty() {
|
||||
// Extract text content
|
||||
let text = response.content.iter()
|
||||
.filter_map(|block| match block {
|
||||
ContentBlock::Text { text } => Some(text.clone()),
|
||||
ContentBlock::Thinking { thinking } => Some(format!("[思考] {}", thinking)),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
// Save final assistant message
|
||||
self.memory.append_message(&session_id, &Message::assistant(&text)).await?;
|
||||
// Save final assistant message with thinking
|
||||
let msg = if let Some(thinking) = &thinking_content {
|
||||
Message::assistant_with_thinking(&text_content, thinking)
|
||||
} else {
|
||||
Message::assistant(&text_content)
|
||||
};
|
||||
self.memory.append_message(&session_id, &msg).await?;
|
||||
|
||||
break AgentLoopResult {
|
||||
response: text,
|
||||
response: text_content,
|
||||
input_tokens: total_input_tokens,
|
||||
output_tokens: total_output_tokens,
|
||||
iterations,
|
||||
};
|
||||
}
|
||||
|
||||
// There are tool calls - add assistant message with tool calls to history
|
||||
// There are tool calls - push assistant message with thinking before tool calls
|
||||
// (required by Kimi and other thinking-enabled APIs)
|
||||
let assistant_msg = if let Some(thinking) = &thinking_content {
|
||||
Message::assistant_with_thinking(&text_content, thinking)
|
||||
} else {
|
||||
Message::assistant(&text_content)
|
||||
};
|
||||
messages.push(assistant_msg);
|
||||
|
||||
for (id, name, input) in &tool_calls {
|
||||
messages.push(Message::tool_use(id, zclaw_types::ToolId::new(name), input.clone()));
|
||||
}
|
||||
@@ -417,19 +462,29 @@ impl AgentLoop {
|
||||
let mut stream = driver.stream(request);
|
||||
let mut pending_tool_calls: Vec<(String, String, serde_json::Value)> = Vec::new();
|
||||
let mut iteration_text = String::new();
|
||||
let mut reasoning_text = String::new(); // Track reasoning separately for API requirement
|
||||
|
||||
// Process stream chunks
|
||||
tracing::debug!("[AgentLoop] Starting to process stream chunks");
|
||||
let mut chunk_count: usize = 0;
|
||||
let mut text_delta_count: usize = 0;
|
||||
let mut thinking_delta_count: usize = 0;
|
||||
while let Some(chunk_result) = stream.next().await {
|
||||
match chunk_result {
|
||||
Ok(chunk) => {
|
||||
chunk_count += 1;
|
||||
match &chunk {
|
||||
StreamChunk::TextDelta { delta } => {
|
||||
text_delta_count += 1;
|
||||
tracing::debug!("[AgentLoop] TextDelta #{}: {} chars", text_delta_count, delta.len());
|
||||
iteration_text.push_str(delta);
|
||||
let _ = tx.send(LoopEvent::Delta(delta.clone())).await;
|
||||
}
|
||||
StreamChunk::ThinkingDelta { delta } => {
|
||||
let _ = tx.send(LoopEvent::Delta(format!("[思考] {}", delta))).await;
|
||||
thinking_delta_count += 1;
|
||||
tracing::debug!("[AgentLoop] ThinkingDelta #{}: {} chars", thinking_delta_count, delta.len());
|
||||
// Accumulate reasoning separately — not mixed into iteration_text
|
||||
reasoning_text.push_str(delta);
|
||||
}
|
||||
StreamChunk::ToolUseStart { id, name } => {
|
||||
tracing::debug!("[AgentLoop] ToolUseStart: id={}, name={}", id, name);
|
||||
@@ -458,6 +513,13 @@ impl AgentLoop {
|
||||
tracing::debug!("[AgentLoop] Stream complete: input_tokens={}, output_tokens={}", it, ot);
|
||||
total_input_tokens += *it;
|
||||
total_output_tokens += *ot;
|
||||
// Calibrate token estimation on first iteration
|
||||
if iteration == 1 {
|
||||
compaction::update_calibration(
|
||||
compaction::estimate_messages_tokens(&messages),
|
||||
*it,
|
||||
);
|
||||
}
|
||||
}
|
||||
StreamChunk::Error { message } => {
|
||||
tracing::error!("[AgentLoop] Stream error: {}", message);
|
||||
@@ -471,16 +533,27 @@ impl AgentLoop {
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::debug!("[AgentLoop] Stream ended, pending_tool_calls count: {}", pending_tool_calls.len());
|
||||
tracing::info!("[AgentLoop] Stream ended: {} total chunks (text={}, thinking={}, tools={}), iteration_text={} chars",
|
||||
chunk_count, text_delta_count, thinking_delta_count, pending_tool_calls.len(),
|
||||
iteration_text.len());
|
||||
if iteration_text.is_empty() {
|
||||
tracing::warn!("[AgentLoop] WARNING: iteration_text is EMPTY after {} chunks! text_delta={}, thinking_delta={}",
|
||||
chunk_count, text_delta_count, thinking_delta_count);
|
||||
}
|
||||
|
||||
// If no tool calls, we have the final response
|
||||
if pending_tool_calls.is_empty() {
|
||||
tracing::debug!("[AgentLoop] No tool calls, returning final response");
|
||||
// Save final assistant message
|
||||
let _ = memory.append_message(&session_id_clone, &Message::assistant(&iteration_text)).await;
|
||||
tracing::info!("[AgentLoop] No tool calls, returning final response: {} chars (reasoning: {} chars)", iteration_text.len(), reasoning_text.len());
|
||||
// Save final assistant message with reasoning
|
||||
if let Err(e) = memory.append_message(&session_id_clone, &Message::assistant_with_thinking(
|
||||
&iteration_text,
|
||||
&reasoning_text,
|
||||
)).await {
|
||||
tracing::warn!("[AgentLoop] Failed to save final assistant message: {}", e);
|
||||
}
|
||||
|
||||
let _ = tx.send(LoopEvent::Complete(AgentLoopResult {
|
||||
response: iteration_text,
|
||||
response: iteration_text.clone(),
|
||||
input_tokens: total_input_tokens,
|
||||
output_tokens: total_output_tokens,
|
||||
iterations: iteration,
|
||||
@@ -488,7 +561,13 @@ impl AgentLoop {
|
||||
break 'outer;
|
||||
}
|
||||
|
||||
tracing::debug!("[AgentLoop] Processing {} tool calls", pending_tool_calls.len());
|
||||
tracing::debug!("[AgentLoop] Processing {} tool calls (reasoning: {} chars)", pending_tool_calls.len(), reasoning_text.len());
|
||||
|
||||
// Push assistant message with reasoning before tool calls (required by Kimi and other thinking-enabled APIs)
|
||||
messages.push(Message::assistant_with_thinking(
|
||||
&iteration_text,
|
||||
&reasoning_text,
|
||||
));
|
||||
|
||||
// There are tool calls - add to message history
|
||||
for (id, name, input) in &pending_tool_calls {
|
||||
@@ -519,12 +598,21 @@ impl AgentLoop {
|
||||
}
|
||||
LoopGuardResult::Allowed => {}
|
||||
}
|
||||
// Use pre-resolved path_validator (already has default fallback from create_tool_context logic)
|
||||
let pv = path_validator.clone().unwrap_or_else(|| {
|
||||
let home = std::env::var("USERPROFILE")
|
||||
.or_else(|_| std::env::var("HOME"))
|
||||
.unwrap_or_else(|_| ".".to_string());
|
||||
PathValidator::new().with_workspace(std::path::PathBuf::from(&home))
|
||||
});
|
||||
let working_dir = pv.workspace_root()
|
||||
.map(|p| p.to_string_lossy().to_string());
|
||||
let tool_context = ToolContext {
|
||||
agent_id: agent_id.clone(),
|
||||
working_directory: None,
|
||||
working_directory: working_dir,
|
||||
session_id: Some(session_id_clone.to_string()),
|
||||
skill_executor: skill_executor.clone(),
|
||||
path_validator: path_validator.clone(),
|
||||
path_validator: Some(pv),
|
||||
};
|
||||
|
||||
let (result, is_error) = if let Some(tool) = tools.get(&name) {
|
||||
|
||||
@@ -160,6 +160,11 @@ impl PathValidator {
|
||||
self
|
||||
}
|
||||
|
||||
/// Get the workspace root directory
|
||||
pub fn workspace_root(&self) -> Option<&PathBuf> {
|
||||
self.workspace_root.as_ref()
|
||||
}
|
||||
|
||||
/// Validate a path for read access
|
||||
pub fn validate_read(&self, path: &str) -> Result<PathBuf> {
|
||||
let canonical = self.resolve_and_validate(path)?;
|
||||
|
||||
Reference in New Issue
Block a user