refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup
- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
This commit is contained in:
@@ -181,8 +181,12 @@ impl LlmDriver for AnthropicDriver {
|
||||
}
|
||||
}
|
||||
"error" => {
|
||||
let error_msg = serde_json::from_str::<serde_json::Value>(&data)
|
||||
.ok()
|
||||
.and_then(|v| v.get("error").and_then(|e| e.get("message")).and_then(|m| m.as_str().map(String::from)))
|
||||
.unwrap_or_else(|| format!("Stream error: {}", &data[..data.len().min(200)]));
|
||||
yield Ok(StreamChunk::Error {
|
||||
message: "Stream error".to_string(),
|
||||
message: error_msg,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
@@ -251,15 +255,42 @@ impl AnthropicDriver {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let requested_max = request.max_tokens.unwrap_or(4096);
|
||||
let (thinking, budget) = if request.thinking_enabled {
|
||||
let budget = match request.reasoning_effort.as_deref() {
|
||||
Some("low") => 2000,
|
||||
Some("medium") => 10000,
|
||||
Some("high") => 32000,
|
||||
_ => 10000, // default
|
||||
};
|
||||
(Some(AnthropicThinking {
|
||||
r#type: "enabled".to_string(),
|
||||
budget_tokens: budget,
|
||||
}), budget)
|
||||
} else {
|
||||
(None, 0)
|
||||
};
|
||||
|
||||
// When thinking is enabled, max_tokens is the TOTAL budget (thinking + text).
|
||||
// Use the maximum output limit (65536) so thinking can consume whatever it
|
||||
// needs without starving the text response. We only pay for tokens actually
|
||||
// generated, so a high limit costs nothing extra.
|
||||
let effective_max = if budget > 0 {
|
||||
65536
|
||||
} else {
|
||||
requested_max
|
||||
};
|
||||
|
||||
AnthropicRequest {
|
||||
model: request.model.clone(),
|
||||
max_tokens: request.max_tokens.unwrap_or(4096),
|
||||
max_tokens: effective_max,
|
||||
system: request.system.clone(),
|
||||
messages,
|
||||
tools: if tools.is_empty() { None } else { Some(tools) },
|
||||
temperature: request.temperature,
|
||||
stop_sequences: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
|
||||
stream: request.stream,
|
||||
thinking,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -313,6 +344,14 @@ struct AnthropicRequest {
|
||||
stop_sequences: Option<Vec<String>>,
|
||||
#[serde(default)]
|
||||
stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
thinking: Option<AnthropicThinking>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct AnthropicThinking {
|
||||
r#type: String,
|
||||
budget_tokens: u32,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
||||
@@ -265,6 +265,10 @@ impl GeminiDriver {
|
||||
/// - Tool definitions use `functionDeclarations`
|
||||
/// - Tool results are sent as `functionResponse` parts in `user` messages
|
||||
fn build_api_request(&self, request: &CompletionRequest) -> GeminiRequest {
|
||||
if request.thinking_enabled {
|
||||
tracing::debug!("[GeminiDriver] thinking_enabled=true but Gemini does not support native thinking mode; ignoring");
|
||||
}
|
||||
|
||||
let mut contents: Vec<GeminiContent> = Vec::new();
|
||||
|
||||
for msg in &request.messages {
|
||||
|
||||
@@ -58,6 +58,10 @@ impl LocalDriver {
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
fn build_api_request(&self, request: &CompletionRequest) -> LocalApiRequest {
|
||||
if request.thinking_enabled {
|
||||
tracing::debug!("[LocalDriver] thinking_enabled=true but local driver does not support native thinking mode; ignoring");
|
||||
}
|
||||
|
||||
let messages: Vec<LocalApiMessage> = request
|
||||
.messages
|
||||
.iter()
|
||||
@@ -183,7 +187,7 @@ impl LocalDriver {
|
||||
.unwrap_or(false);
|
||||
|
||||
let blocks = if has_tool_calls {
|
||||
let tool_calls = c.message.tool_calls.as_ref().unwrap();
|
||||
let tool_calls = c.message.tool_calls.as_deref().unwrap_or_default();
|
||||
tool_calls
|
||||
.iter()
|
||||
.map(|tc| {
|
||||
@@ -199,7 +203,7 @@ impl LocalDriver {
|
||||
.collect()
|
||||
} else if has_content {
|
||||
vec![ContentBlock::Text {
|
||||
text: c.message.content.clone().unwrap(),
|
||||
text: c.message.content.clone().unwrap_or_default(),
|
||||
}]
|
||||
} else {
|
||||
vec![ContentBlock::Text {
|
||||
|
||||
@@ -60,6 +60,15 @@ pub struct CompletionRequest {
|
||||
pub stop: Vec<String>,
|
||||
/// Enable streaming
|
||||
pub stream: bool,
|
||||
/// Enable extended thinking/reasoning
|
||||
#[serde(default)]
|
||||
pub thinking_enabled: bool,
|
||||
/// Reasoning effort level (for providers that support it)
|
||||
#[serde(default)]
|
||||
pub reasoning_effort: Option<String>,
|
||||
/// Enable plan mode
|
||||
#[serde(default)]
|
||||
pub plan_mode: bool,
|
||||
}
|
||||
|
||||
impl Default for CompletionRequest {
|
||||
@@ -73,27 +82,16 @@ impl Default for CompletionRequest {
|
||||
temperature: Some(0.7),
|
||||
stop: Vec::new(),
|
||||
stream: false,
|
||||
thinking_enabled: false,
|
||||
reasoning_effort: None,
|
||||
plan_mode: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tool definition for LLM
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ToolDefinition {
|
||||
pub name: String,
|
||||
pub description: String,
|
||||
pub input_schema: serde_json::Value,
|
||||
}
|
||||
|
||||
impl ToolDefinition {
|
||||
pub fn new(name: impl Into<String>, description: impl Into<String>, schema: serde_json::Value) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
description: description.into(),
|
||||
input_schema: schema,
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Tool definition for LLM function calling.
|
||||
/// Re-exported from `zclaw_types::tool::ToolDefinition` (canonical definition).
|
||||
pub use zclaw_types::tool::ToolDefinition;
|
||||
|
||||
/// Completion response
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -110,7 +108,8 @@ pub struct CompletionResponse {
|
||||
pub stop_reason: StopReason,
|
||||
}
|
||||
|
||||
/// Content block in response
|
||||
/// LLM driver response content block (subset of canonical zclaw_types::ContentBlock).
|
||||
/// Used internally by Anthropic/OpenAI/Gemini/Local drivers for API response parsing.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum ContentBlock {
|
||||
|
||||
@@ -130,8 +130,8 @@ impl LlmDriver for OpenAiDriver {
|
||||
let api_key = self.api_key.expose_secret().to_string();
|
||||
|
||||
Box::pin(stream! {
|
||||
println!("[OpenAI:stream] POST to {}/chat/completions", base_url);
|
||||
println!("[OpenAI:stream] Request model={}, stream={}", stream_request.model, stream_request.stream);
|
||||
tracing::debug!("[OpenAI:stream] POST to {}/chat/completions", base_url);
|
||||
tracing::debug!("[OpenAI:stream] Request model={}, stream={}", stream_request.model, stream_request.stream);
|
||||
let response = match self.client
|
||||
.post(format!("{}/chat/completions", base_url))
|
||||
.header("Authorization", format!("Bearer {}", api_key))
|
||||
@@ -142,11 +142,11 @@ impl LlmDriver for OpenAiDriver {
|
||||
.await
|
||||
{
|
||||
Ok(r) => {
|
||||
println!("[OpenAI:stream] Response status: {}, content-type: {:?}", r.status(), r.headers().get("content-type"));
|
||||
tracing::debug!("[OpenAI:stream] Response status: {}, content-type: {:?}", r.status(), r.headers().get("content-type"));
|
||||
r
|
||||
},
|
||||
Err(e) => {
|
||||
println!("[OpenAI:stream] HTTP request FAILED: {:?}", e);
|
||||
tracing::debug!("[OpenAI:stream] HTTP request FAILED: {:?}", e);
|
||||
yield Err(ZclawError::LlmError(format!("HTTP request failed: {}", e)));
|
||||
return;
|
||||
}
|
||||
@@ -155,7 +155,7 @@ impl LlmDriver for OpenAiDriver {
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
println!("[OpenAI:stream] API error {}: {}", status, &body[..body.len().min(500)]);
|
||||
tracing::debug!("[OpenAI:stream] API error {}: {}", status, &body[..body.len().min(500)]);
|
||||
yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
|
||||
return;
|
||||
}
|
||||
@@ -170,7 +170,7 @@ impl LlmDriver for OpenAiDriver {
|
||||
let chunk = match chunk_result {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
println!("[OpenAI:stream] Byte stream error: {:?}", e);
|
||||
tracing::debug!("[OpenAI:stream] Byte stream error: {:?}", e);
|
||||
yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
|
||||
continue;
|
||||
}
|
||||
@@ -180,7 +180,7 @@ impl LlmDriver for OpenAiDriver {
|
||||
let text = String::from_utf8_lossy(&chunk);
|
||||
// Log first 500 bytes of raw data for debugging SSE format
|
||||
if raw_bytes_total <= 600 {
|
||||
println!("[OpenAI:stream] RAW chunk ({} bytes): {:?}", text.len(), &text[..text.len().min(500)]);
|
||||
tracing::debug!("[OpenAI:stream] RAW chunk ({} bytes): {:?}", text.len(), &text[..text.len().min(500)]);
|
||||
}
|
||||
for line in text.lines() {
|
||||
let trimmed = line.trim();
|
||||
@@ -198,10 +198,10 @@ impl LlmDriver for OpenAiDriver {
|
||||
if let Some(data) = data {
|
||||
sse_event_count += 1;
|
||||
if sse_event_count <= 3 || data == "[DONE]" {
|
||||
println!("[OpenAI:stream] SSE #{}: {}", sse_event_count, &data[..data.len().min(300)]);
|
||||
tracing::debug!("[OpenAI:stream] SSE #{}: {}", sse_event_count, &data[..data.len().min(300)]);
|
||||
}
|
||||
if data == "[DONE]" {
|
||||
println!("[OpenAI:stream] Received [DONE], total SSE events: {}, raw bytes: {}", sse_event_count, raw_bytes_total);
|
||||
tracing::debug!("[OpenAI:stream] Received [DONE], total SSE events: {}, raw bytes: {}", sse_event_count, raw_bytes_total);
|
||||
|
||||
// Emit ToolUseEnd for all accumulated tool calls (skip invalid ones with empty name)
|
||||
for (id, (name, args)) in &accumulated_tool_calls {
|
||||
@@ -319,7 +319,7 @@ impl LlmDriver for OpenAiDriver {
|
||||
}
|
||||
}
|
||||
}
|
||||
println!("[OpenAI:stream] Byte stream ended. Total: {} SSE events, {} raw bytes", sse_event_count, raw_bytes_total);
|
||||
tracing::debug!("[OpenAI:stream] Byte stream ended. Total: {} SSE events, {} raw bytes", sse_event_count, raw_bytes_total);
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -496,6 +496,7 @@ impl OpenAiDriver {
|
||||
stop: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
|
||||
stream: request.stream,
|
||||
tools: if tools.is_empty() { None } else { Some(tools) },
|
||||
reasoning_effort: request.reasoning_effort.clone(),
|
||||
};
|
||||
|
||||
// Pre-send payload size validation
|
||||
@@ -581,8 +582,8 @@ impl OpenAiDriver {
|
||||
let has_reasoning = c.message.reasoning_content.as_ref().map(|t| !t.is_empty()).unwrap_or(false);
|
||||
|
||||
let blocks = if has_tool_calls {
|
||||
// Tool calls take priority
|
||||
let tool_calls = c.message.tool_calls.as_ref().unwrap();
|
||||
// Tool calls take priority — safe to unwrap after has_tool_calls check
|
||||
let tool_calls = c.message.tool_calls.as_ref().cloned().unwrap_or_default();
|
||||
tracing::debug!("[OpenAiDriver:convert_response] Using tool_calls: {} calls", tool_calls.len());
|
||||
tool_calls.iter().map(|tc| ContentBlock::ToolUse {
|
||||
id: tc.id.clone(),
|
||||
@@ -590,15 +591,15 @@ impl OpenAiDriver {
|
||||
input: serde_json::from_str(&tc.function.arguments).unwrap_or(serde_json::Value::Null),
|
||||
}).collect()
|
||||
} else if has_content {
|
||||
// Non-empty content
|
||||
let text = c.message.content.as_ref().unwrap();
|
||||
// Non-empty content — safe to unwrap after has_content check
|
||||
let text = c.message.content.as_deref().unwrap_or("");
|
||||
tracing::debug!("[OpenAiDriver:convert_response] Using text content: {} chars", text.len());
|
||||
vec![ContentBlock::Text { text: text.clone() }]
|
||||
vec![ContentBlock::Text { text: text.to_string() }]
|
||||
} else if has_reasoning {
|
||||
// Content empty but reasoning_content present (Kimi, Qwen, DeepSeek)
|
||||
let reasoning = c.message.reasoning_content.as_ref().unwrap();
|
||||
let reasoning = c.message.reasoning_content.as_deref().unwrap_or("");
|
||||
tracing::debug!("[OpenAiDriver:convert_response] Using reasoning_content: {} chars", reasoning.len());
|
||||
vec![ContentBlock::Text { text: reasoning.clone() }]
|
||||
vec![ContentBlock::Text { text: reasoning.to_string() }]
|
||||
} else {
|
||||
// No content or tool_calls
|
||||
tracing::debug!("[OpenAiDriver:convert_response] No content or tool_calls, using empty text");
|
||||
@@ -771,6 +772,8 @@ struct OpenAiRequest {
|
||||
stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
tools: Option<Vec<OpenAiTool>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
reasoning_effort: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -833,7 +836,7 @@ struct OpenAiResponse {
|
||||
usage: Option<OpenAiUsage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
#[derive(Deserialize, Default, Clone)]
|
||||
struct OpenAiChoice {
|
||||
#[serde(default)]
|
||||
message: OpenAiResponseMessage,
|
||||
@@ -841,7 +844,7 @@ struct OpenAiChoice {
|
||||
finish_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
#[derive(Deserialize, Default, Clone)]
|
||||
struct OpenAiResponseMessage {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
@@ -851,7 +854,7 @@ struct OpenAiResponseMessage {
|
||||
tool_calls: Option<Vec<OpenAiToolCallResponse>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
#[derive(Deserialize, Default, Clone)]
|
||||
struct OpenAiToolCallResponse {
|
||||
#[serde(default)]
|
||||
id: String,
|
||||
@@ -859,7 +862,7 @@ struct OpenAiToolCallResponse {
|
||||
function: FunctionCallResponse,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
#[derive(Deserialize, Default, Clone)]
|
||||
struct FunctionCallResponse {
|
||||
#[serde(default)]
|
||||
name: String,
|
||||
|
||||
Reference in New Issue
Block a user