refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup
- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules - Split zclaw-kernel/generation.rs (1080 lines) into 3 modules - Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard - Add PromptBuilder for structured system prompt assembly - Add FactStore (zclaw-memory) for persistent fact extraction - Add task builtin tool for agent task management - Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings - Replace let _ = with proper log::warn! across SaaS handlers - Remove unused dependency (url) from zclaw-hands
This commit is contained in:
@@ -181,8 +181,12 @@ impl LlmDriver for AnthropicDriver {
|
||||
}
|
||||
}
|
||||
"error" => {
|
||||
let error_msg = serde_json::from_str::<serde_json::Value>(&data)
|
||||
.ok()
|
||||
.and_then(|v| v.get("error").and_then(|e| e.get("message")).and_then(|m| m.as_str().map(String::from)))
|
||||
.unwrap_or_else(|| format!("Stream error: {}", &data[..data.len().min(200)]));
|
||||
yield Ok(StreamChunk::Error {
|
||||
message: "Stream error".to_string(),
|
||||
message: error_msg,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
@@ -251,15 +255,42 @@ impl AnthropicDriver {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let requested_max = request.max_tokens.unwrap_or(4096);
|
||||
let (thinking, budget) = if request.thinking_enabled {
|
||||
let budget = match request.reasoning_effort.as_deref() {
|
||||
Some("low") => 2000,
|
||||
Some("medium") => 10000,
|
||||
Some("high") => 32000,
|
||||
_ => 10000, // default
|
||||
};
|
||||
(Some(AnthropicThinking {
|
||||
r#type: "enabled".to_string(),
|
||||
budget_tokens: budget,
|
||||
}), budget)
|
||||
} else {
|
||||
(None, 0)
|
||||
};
|
||||
|
||||
// When thinking is enabled, max_tokens is the TOTAL budget (thinking + text).
|
||||
// Use the maximum output limit (65536) so thinking can consume whatever it
|
||||
// needs without starving the text response. We only pay for tokens actually
|
||||
// generated, so a high limit costs nothing extra.
|
||||
let effective_max = if budget > 0 {
|
||||
65536
|
||||
} else {
|
||||
requested_max
|
||||
};
|
||||
|
||||
AnthropicRequest {
|
||||
model: request.model.clone(),
|
||||
max_tokens: request.max_tokens.unwrap_or(4096),
|
||||
max_tokens: effective_max,
|
||||
system: request.system.clone(),
|
||||
messages,
|
||||
tools: if tools.is_empty() { None } else { Some(tools) },
|
||||
temperature: request.temperature,
|
||||
stop_sequences: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
|
||||
stream: request.stream,
|
||||
thinking,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -313,6 +344,14 @@ struct AnthropicRequest {
|
||||
stop_sequences: Option<Vec<String>>,
|
||||
#[serde(default)]
|
||||
stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
thinking: Option<AnthropicThinking>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct AnthropicThinking {
|
||||
r#type: String,
|
||||
budget_tokens: u32,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
||||
Reference in New Issue
Block a user