refactor(crates): kernel/generation module split + DeerFlow optimizations + middleware + dead code cleanup

- Split zclaw-kernel/kernel.rs (1486 lines) into 9 domain modules
- Split zclaw-kernel/generation.rs (1080 lines) into 3 modules
- Add DeerFlow-inspired middleware: DanglingTool, SubagentLimit, ToolError, ToolOutputGuard
- Add PromptBuilder for structured system prompt assembly
- Add FactStore (zclaw-memory) for persistent fact extraction
- Add task builtin tool for agent task management
- Driver improvements: Anthropic/OpenAI extended thinking, Gemini safety settings
- Replace let _ = with proper log::warn! across SaaS handlers
- Remove unused dependency (url) from zclaw-hands
This commit is contained in:
iven
2026-04-03 00:28:03 +08:00
parent 0a04b260a4
commit 52bdafa633
55 changed files with 4130 additions and 1959 deletions

View File

@@ -181,8 +181,12 @@ impl LlmDriver for AnthropicDriver {
}
}
"error" => {
let error_msg = serde_json::from_str::<serde_json::Value>(&data)
.ok()
.and_then(|v| v.get("error").and_then(|e| e.get("message")).and_then(|m| m.as_str().map(String::from)))
.unwrap_or_else(|| format!("Stream error: {}", &data[..data.len().min(200)]));
yield Ok(StreamChunk::Error {
message: "Stream error".to_string(),
message: error_msg,
});
}
_ => {}
@@ -251,15 +255,42 @@ impl AnthropicDriver {
})
.collect();
let requested_max = request.max_tokens.unwrap_or(4096);
let (thinking, budget) = if request.thinking_enabled {
let budget = match request.reasoning_effort.as_deref() {
Some("low") => 2000,
Some("medium") => 10000,
Some("high") => 32000,
_ => 10000, // default
};
(Some(AnthropicThinking {
r#type: "enabled".to_string(),
budget_tokens: budget,
}), budget)
} else {
(None, 0)
};
// When thinking is enabled, max_tokens is the TOTAL budget (thinking + text).
// Use the maximum output limit (65536) so thinking can consume whatever it
// needs without starving the text response. We only pay for tokens actually
// generated, so a high limit costs nothing extra.
let effective_max = if budget > 0 {
65536
} else {
requested_max
};
AnthropicRequest {
model: request.model.clone(),
max_tokens: request.max_tokens.unwrap_or(4096),
max_tokens: effective_max,
system: request.system.clone(),
messages,
tools: if tools.is_empty() { None } else { Some(tools) },
temperature: request.temperature,
stop_sequences: if request.stop.is_empty() { None } else { Some(request.stop.clone()) },
stream: request.stream,
thinking,
}
}
@@ -313,6 +344,14 @@ struct AnthropicRequest {
stop_sequences: Option<Vec<String>>,
#[serde(default)]
stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
thinking: Option<AnthropicThinking>,
}
#[derive(Serialize)]
struct AnthropicThinking {
r#type: String,
budget_tokens: u32,
}
#[derive(Serialize)]