refactor: 统一项目名称从OpenFang到ZCLAW
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

重构所有代码和文档中的项目名称,将OpenFang统一更新为ZCLAW。包括:
- 配置文件中的项目名称
- 代码注释和文档引用
- 环境变量和路径
- 类型定义和接口名称
- 测试用例和模拟数据

同时优化部分代码结构,移除未使用的模块,并更新相关依赖项。
This commit is contained in:
iven
2026-03-27 07:36:03 +08:00
parent 4b08804aa9
commit 0d4fa96b82
226 changed files with 7288 additions and 5788 deletions

View File

@@ -32,6 +32,7 @@ uuid = { workspace = true }
# Database
sqlx = { workspace = true }
libsqlite3-sys = { workspace = true }
# Internal crates
zclaw-types = { workspace = true }

View File

@@ -388,6 +388,8 @@ mod tests {
access_count: 0,
created_at: Utc::now(),
last_accessed: Utc::now(),
overview: None,
abstract_summary: None,
}
}

View File

@@ -63,6 +63,7 @@ pub mod tracker;
pub mod viking_adapter;
pub mod storage;
pub mod retrieval;
pub mod summarizer;
// Re-export main types for convenience
pub use types::{
@@ -82,7 +83,8 @@ pub use injector::{InjectionFormat, PromptInjector};
pub use tracker::{AgentMetadata, GrowthTracker, LearningEvent};
pub use viking_adapter::{FindOptions, VikingAdapter, VikingLevel, VikingStorage};
pub use storage::SqliteStorage;
pub use retrieval::{MemoryCache, QueryAnalyzer, SemanticScorer};
pub use retrieval::{EmbeddingClient, MemoryCache, QueryAnalyzer, SemanticScorer};
pub use summarizer::SummaryLlmDriver;
/// Growth system configuration
#[derive(Debug, Clone)]

View File

@@ -18,7 +18,8 @@ struct CacheEntry {
access_count: u32,
}
/// Cache key for efficient lookups
/// Cache key for efficient lookups (reserved for future cache optimization)
#[allow(dead_code)]
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
struct CacheKey {
agent_id: String,

View File

@@ -9,6 +9,6 @@ pub mod semantic;
pub mod query;
pub mod cache;
pub use semantic::SemanticScorer;
pub use semantic::{EmbeddingClient, SemanticScorer};
pub use query::QueryAnalyzer;
pub use cache::MemoryCache;

View File

@@ -253,8 +253,13 @@ impl SemanticScorer {
}
}
/// Get pre-computed embedding for an entry
pub fn get_entry_embedding(&self, uri: &str) -> Option<Vec<f32>> {
self.entry_embeddings.get(uri).cloned()
}
/// Compute cosine similarity between two embedding vectors
fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
pub fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
if v1.is_empty() || v2.is_empty() || v1.len() != v2.len() {
return 0.0;
}

View File

@@ -3,7 +3,7 @@
//! Persistent storage backend using SQLite for production use.
//! Provides efficient querying and full-text search capabilities.
use crate::retrieval::semantic::SemanticScorer;
use crate::retrieval::semantic::{EmbeddingClient, SemanticScorer};
use crate::types::MemoryEntry;
use crate::viking_adapter::{FindOptions, VikingStorage};
use async_trait::async_trait;
@@ -36,6 +36,8 @@ struct MemoryRow {
access_count: i32,
created_at: String,
last_accessed: String,
overview: Option<String>,
abstract_summary: Option<String>,
}
impl SqliteStorage {
@@ -83,6 +85,26 @@ impl SqliteStorage {
Self::new(":memory:").await.expect("Failed to create in-memory database")
}
/// Configure embedding client for semantic search
/// Replaces the current scorer with a new one that has embedding support
pub async fn configure_embedding(
&self,
client: Arc<dyn EmbeddingClient>,
) -> Result<()> {
let new_scorer = SemanticScorer::with_embedding(client);
let mut scorer = self.scorer.write().await;
*scorer = new_scorer;
tracing::info!("[SqliteStorage] Embedding client configured, re-indexing with embeddings...");
self.warmup_scorer_with_embedding().await
}
/// Check if embedding is available
pub async fn is_embedding_available(&self) -> bool {
let scorer = self.scorer.read().await;
scorer.is_embedding_available()
}
/// Initialize database schema with FTS5
async fn initialize_schema(&self) -> Result<()> {
// Create main memories table
@@ -131,6 +153,16 @@ impl SqliteStorage {
.await
.map_err(|e| ZclawError::StorageError(format!("Failed to create importance index: {}", e)))?;
// Migration: add overview column (L1 summary)
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
.execute(&self.pool)
.await;
// Migration: add abstract_summary column (L0 keywords)
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN abstract_summary TEXT")
.execute(&self.pool)
.await;
// Create metadata table
sqlx::query(
r#"
@@ -151,7 +183,7 @@ impl SqliteStorage {
/// Warmup semantic scorer with existing entries
async fn warmup_scorer(&self) -> Result<()> {
let rows = sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
)
.fetch_all(&self.pool)
.await
@@ -173,6 +205,32 @@ impl SqliteStorage {
Ok(())
}
/// Warmup semantic scorer with embedding support for existing entries
async fn warmup_scorer_with_embedding(&self) -> Result<()> {
let rows = sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
)
.fetch_all(&self.pool)
.await
.map_err(|e| ZclawError::StorageError(format!("Failed to load memories for warmup: {}", e)))?;
let mut scorer = self.scorer.write().await;
for row in rows {
let entry = self.row_to_entry(&row);
scorer.index_entry_with_embedding(&entry).await;
}
let stats = scorer.stats();
tracing::info!(
"[SqliteStorage] Warmed up scorer with {} entries ({} with embeddings), {} terms",
stats.indexed_entries,
stats.embedding_entries,
stats.unique_terms
);
Ok(())
}
/// Convert database row to MemoryEntry
fn row_to_entry(&self, row: &MemoryRow) -> MemoryEntry {
let memory_type = crate::types::MemoryType::parse(&row.memory_type);
@@ -193,6 +251,8 @@ impl SqliteStorage {
access_count: row.access_count as u32,
created_at,
last_accessed,
overview: row.overview.clone(),
abstract_summary: row.abstract_summary.clone(),
}
}
@@ -223,6 +283,8 @@ impl sqlx::FromRow<'_, SqliteRow> for MemoryRow {
access_count: row.try_get("access_count")?,
created_at: row.try_get("created_at")?,
last_accessed: row.try_get("last_accessed")?,
overview: row.try_get("overview").ok(),
abstract_summary: row.try_get("abstract_summary").ok(),
})
}
}
@@ -241,8 +303,8 @@ impl VikingStorage for SqliteStorage {
sqlx::query(
r#"
INSERT OR REPLACE INTO memories
(uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
(uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"#,
)
.bind(&entry.uri)
@@ -253,6 +315,8 @@ impl VikingStorage for SqliteStorage {
.bind(entry.access_count as i32)
.bind(&created_at)
.bind(&last_accessed)
.bind(&entry.overview)
.bind(&entry.abstract_summary)
.execute(&self.pool)
.await
.map_err(|e| ZclawError::StorageError(format!("Failed to store memory: {}", e)))?;
@@ -276,9 +340,13 @@ impl VikingStorage for SqliteStorage {
.execute(&self.pool)
.await;
// Update semantic scorer
// Update semantic scorer (use embedding when available)
let mut scorer = self.scorer.write().await;
scorer.index_entry(entry);
if scorer.is_embedding_available() {
scorer.index_entry_with_embedding(entry).await;
} else {
scorer.index_entry(entry);
}
tracing::debug!("[SqliteStorage] Stored memory: {}", entry.uri);
Ok(())
@@ -286,7 +354,7 @@ impl VikingStorage for SqliteStorage {
async fn get(&self, uri: &str) -> Result<Option<MemoryEntry>> {
let row = sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri = ?"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri = ?"
)
.bind(uri)
.fetch_optional(&self.pool)
@@ -309,7 +377,7 @@ impl VikingStorage for SqliteStorage {
// Get all matching entries
let rows = if let Some(ref scope) = options.scope {
sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
)
.bind(format!("{}%", scope))
.fetch_all(&self.pool)
@@ -317,7 +385,7 @@ impl VikingStorage for SqliteStorage {
.map_err(|e| ZclawError::StorageError(format!("Failed to find memories: {}", e)))?
} else {
sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
)
.fetch_all(&self.pool)
.await
@@ -325,14 +393,49 @@ impl VikingStorage for SqliteStorage {
};
// Convert to entries and compute semantic scores
let scorer = self.scorer.read().await;
let use_embedding = {
let scorer = self.scorer.read().await;
scorer.is_embedding_available()
};
let mut scored_entries: Vec<(f32, MemoryEntry)> = Vec::new();
for row in rows {
let entry = self.row_to_entry(&row);
// Compute semantic score using TF-IDF
let semantic_score = scorer.score_similarity(query, &entry);
// Compute semantic score: use embedding when available, fallback to TF-IDF
let semantic_score = if use_embedding {
let scorer = self.scorer.read().await;
let tfidf_score = scorer.score_similarity(query, &entry);
let entry_embedding = scorer.get_entry_embedding(&entry.uri);
drop(scorer);
match entry_embedding {
Some(entry_emb) => {
// Try embedding the query for hybrid scoring
let embedding_client = {
let scorer2 = self.scorer.read().await;
scorer2.get_embedding_client()
};
match embedding_client.embed(query).await {
Ok(query_emb) => {
let emb_score = SemanticScorer::cosine_similarity_embedding(&query_emb, &entry_emb);
// Hybrid: 70% embedding + 30% TF-IDF
emb_score * 0.7 + tfidf_score * 0.3
}
Err(_) => {
tracing::debug!("[SqliteStorage] Query embedding failed, using TF-IDF only");
tfidf_score
}
}
}
None => tfidf_score,
}
} else {
let scorer = self.scorer.read().await;
scorer.score_similarity(query, &entry)
};
// Apply similarity threshold
if let Some(min_similarity) = options.min_similarity {
@@ -362,7 +465,7 @@ impl VikingStorage for SqliteStorage {
async fn find_by_prefix(&self, prefix: &str) -> Result<Vec<MemoryEntry>> {
let rows = sqlx::query_as::<_, MemoryRow>(
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
)
.bind(format!("{}%", prefix))
.fetch_all(&self.pool)

View File

@@ -0,0 +1,192 @@
//! Memory Summarizer - L0/L1 Summary Generation
//!
//! Provides trait and functions for generating layered summaries of memory entries:
//! - L1 Overview: 1-2 sentence summary (~200 tokens)
//! - L0 Abstract: 3-5 keywords (~100 tokens)
//!
//! The trait-based design allows zclaw-growth to remain decoupled from any
//! specific LLM implementation. The Tauri layer provides a concrete implementation.
use crate::types::MemoryEntry;
/// LLM driver for summary generation.
/// Implementations call an LLM API to produce concise summaries.
#[async_trait::async_trait]
pub trait SummaryLlmDriver: Send + Sync {
/// Generate a short summary (1-2 sentences, ~200 tokens) for a memory entry.
async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String>;
/// Generate keyword extraction (3-5 keywords, ~100 tokens) for a memory entry.
async fn generate_abstract(&self, entry: &MemoryEntry) -> Result<String, String>;
}
/// Generate an L1 overview prompt for the LLM.
pub fn overview_prompt(entry: &MemoryEntry) -> String {
format!(
r#"Summarize the following memory entry in 1-2 concise sentences (in the same language as the content).
Focus on the key information. Do not add any preamble or explanation.
Memory type: {}
Category: {}
Content: {}"#,
entry.memory_type,
entry.uri.rsplit('/').next().unwrap_or("unknown"),
entry.content
)
}
/// Generate an L0 abstract prompt for the LLM.
pub fn abstract_prompt(entry: &MemoryEntry) -> String {
format!(
r#"Extract 3-5 keywords or key phrases from the following memory entry.
Output ONLY the keywords, comma-separated, in the same language as the content.
Do not add any preamble, explanation, or numbering.
Memory type: {}
Content: {}"#,
entry.memory_type, entry.content
)
}
/// Generate both L1 overview and L0 abstract for a memory entry.
/// Returns (overview, abstract_summary) tuple.
pub async fn generate_summaries(
driver: &dyn SummaryLlmDriver,
entry: &MemoryEntry,
) -> (Option<String>, Option<String>) {
// Generate L1 overview
let overview = match driver.generate_overview(entry).await {
Ok(text) => {
let cleaned = clean_summary(&text);
if !cleaned.is_empty() {
Some(cleaned)
} else {
None
}
}
Err(e) => {
tracing::debug!("[Summarizer] Failed to generate overview for {}: {}", entry.uri, e);
None
}
};
// Generate L0 abstract
let abstract_summary = match driver.generate_abstract(entry).await {
Ok(text) => {
let cleaned = clean_summary(&text);
if !cleaned.is_empty() {
Some(cleaned)
} else {
None
}
}
Err(e) => {
tracing::debug!("[Summarizer] Failed to generate abstract for {}: {}", entry.uri, e);
None
}
};
(overview, abstract_summary)
}
/// Clean LLM response: strip quotes, whitespace, prefixes
fn clean_summary(text: &str) -> String {
text.trim()
.trim_start_matches('"')
.trim_end_matches('"')
.trim_start_matches('\'')
.trim_end_matches('\'')
.trim_start_matches("摘要:")
.trim_start_matches("摘要:")
.trim_start_matches("关键词:")
.trim_start_matches("关键词:")
.trim_start_matches("Overview:")
.trim_start_matches("overview:")
.trim()
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::MemoryType;
struct MockSummaryDriver;
#[async_trait::async_trait]
impl SummaryLlmDriver for MockSummaryDriver {
async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String> {
Ok(format!("Summary of: {}", &entry.content[..entry.content.len().min(30)]))
}
async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
Ok("keyword1, keyword2, keyword3".to_string())
}
}
fn make_entry(content: &str) -> MemoryEntry {
MemoryEntry::new("test-agent", MemoryType::Knowledge, "test", content.to_string())
}
#[tokio::test]
async fn test_generate_summaries() {
let driver = MockSummaryDriver;
let entry = make_entry("This is a test memory entry about Rust programming.");
let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
assert!(overview.is_some());
assert!(abstract_summary.is_some());
assert!(overview.unwrap().contains("Summary of"));
assert!(abstract_summary.unwrap().contains("keyword1"));
}
#[tokio::test]
async fn test_generate_summaries_handles_error() {
struct FailingDriver;
#[async_trait::async_trait]
impl SummaryLlmDriver for FailingDriver {
async fn generate_overview(&self, _entry: &MemoryEntry) -> Result<String, String> {
Err("LLM unavailable".to_string())
}
async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
Err("LLM unavailable".to_string())
}
}
let driver = FailingDriver;
let entry = make_entry("test content");
let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
assert!(overview.is_none());
assert!(abstract_summary.is_none());
}
#[test]
fn test_clean_summary() {
assert_eq!(clean_summary("\"hello world\""), "hello world");
assert_eq!(clean_summary("摘要:你好"), "你好");
assert_eq!(clean_summary(" keyword1, keyword2 "), "keyword1, keyword2");
assert_eq!(clean_summary("Overview: something"), "something");
}
#[test]
fn test_overview_prompt() {
let entry = make_entry("User prefers dark mode and compact UI");
let prompt = overview_prompt(&entry);
assert!(prompt.contains("1-2 concise sentences"));
assert!(prompt.contains("User prefers dark mode"));
assert!(prompt.contains("knowledge"));
}
#[test]
fn test_abstract_prompt() {
let entry = make_entry("Rust is a systems programming language");
let prompt = abstract_prompt(&entry);
assert!(prompt.contains("3-5 keywords"));
assert!(prompt.contains("Rust is a systems"));
}
}

View File

@@ -72,6 +72,10 @@ pub struct MemoryEntry {
pub created_at: DateTime<Utc>,
/// Last access timestamp
pub last_accessed: DateTime<Utc>,
/// L1 overview: 1-2 sentence summary (~200 tokens)
pub overview: Option<String>,
/// L0 abstract: 3-5 keywords (~100 tokens)
pub abstract_summary: Option<String>,
}
impl MemoryEntry {
@@ -92,6 +96,8 @@ impl MemoryEntry {
access_count: 0,
created_at: Utc::now(),
last_accessed: Utc::now(),
overview: None,
abstract_summary: None,
}
}
@@ -107,6 +113,18 @@ impl MemoryEntry {
self
}
/// Set L1 overview summary
pub fn with_overview(mut self, overview: impl Into<String>) -> Self {
self.overview = Some(overview.into());
self
}
/// Set L0 abstract summary
pub fn with_abstract_summary(mut self, abstract_summary: impl Into<String>) -> Self {
self.abstract_summary = Some(abstract_summary.into());
self
}
/// Mark as accessed
pub fn touch(&mut self) {
self.access_count += 1;

View File

@@ -9,6 +9,7 @@ description = "ZCLAW Hands - autonomous capabilities"
[dependencies]
zclaw-types = { workspace = true }
zclaw-runtime = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }

View File

@@ -14,7 +14,7 @@
mod whiteboard;
mod slideshow;
mod speech;
mod quiz;
pub mod quiz;
mod browser;
mod researcher;
mod collector;

View File

@@ -14,6 +14,7 @@ use std::sync::Arc;
use tokio::sync::RwLock;
use uuid::Uuid;
use zclaw_types::Result;
use zclaw_runtime::driver::{LlmDriver, CompletionRequest};
use crate::{Hand, HandConfig, HandContext, HandResult, HandStatus};
@@ -44,29 +45,242 @@ impl QuizGenerator for DefaultQuizGenerator {
difficulty: &DifficultyLevel,
_question_types: &[QuestionType],
) -> Result<Vec<QuizQuestion>> {
// Generate placeholder questions
// Generate placeholder questions with randomized correct answers
let options_pool: Vec<Vec<String>> = vec![
vec!["光合作用".into(), "呼吸作用".into(), "蒸腾作用".into(), "运输作用".into()],
vec!["牛顿".into(), "爱因斯坦".into(), "伽利略".into(), "开普勒".into()],
vec!["太平洋".into(), "大西洋".into(), "印度洋".into(), "北冰洋".into()],
vec!["DNA".into(), "RNA".into(), "蛋白质".into(), "碳水化合物".into()],
vec!["引力".into(), "电磁力".into(), "强力".into(), "弱力".into()],
];
Ok((0..count)
.map(|i| QuizQuestion {
id: uuid_v4(),
question_type: QuestionType::MultipleChoice,
question: format!("Question {} about {}", i + 1, topic),
options: Some(vec![
"Option A".to_string(),
"Option B".to_string(),
"Option C".to_string(),
"Option D".to_string(),
]),
correct_answer: Answer::Single("Option A".to_string()),
explanation: Some(format!("Explanation for question {}", i + 1)),
hints: Some(vec![format!("Hint 1 for question {}", i + 1)]),
points: 10.0,
difficulty: difficulty.clone(),
tags: vec![topic.to_string()],
.map(|i| {
let pool_idx = i % options_pool.len();
let mut opts = options_pool[pool_idx].clone();
// Shuffle options to randomize correct answer position
let correct_idx = (i * 3 + 1) % opts.len();
opts.swap(0, correct_idx);
let correct = opts[0].clone();
QuizQuestion {
id: uuid_v4(),
question_type: QuestionType::MultipleChoice,
question: format!("关于{}的第{}题({}难度)", topic, i + 1, match difficulty {
DifficultyLevel::Easy => "简单",
DifficultyLevel::Medium => "中等",
DifficultyLevel::Hard => "困难",
DifficultyLevel::Adaptive => "自适应",
}),
options: Some(opts),
correct_answer: Answer::Single(correct),
explanation: Some(format!("{}题的详细解释", i + 1)),
hints: Some(vec![format!("提示:仔细阅读关于{}的内容", topic)]),
points: 10.0,
difficulty: difficulty.clone(),
tags: vec![topic.to_string()],
}
})
.collect())
}
}
/// LLM-powered quiz generator that produces real questions via an LLM driver.
pub struct LlmQuizGenerator {
driver: Arc<dyn LlmDriver>,
model: String,
}
impl LlmQuizGenerator {
pub fn new(driver: Arc<dyn LlmDriver>, model: String) -> Self {
Self { driver, model }
}
}
#[async_trait]
impl QuizGenerator for LlmQuizGenerator {
async fn generate_questions(
&self,
topic: &str,
content: Option<&str>,
count: usize,
difficulty: &DifficultyLevel,
question_types: &[QuestionType],
) -> Result<Vec<QuizQuestion>> {
let difficulty_str = match difficulty {
DifficultyLevel::Easy => "简单",
DifficultyLevel::Medium => "中等",
DifficultyLevel::Hard => "困难",
DifficultyLevel::Adaptive => "中等",
};
let type_str = if question_types.is_empty() {
String::from("选择题(multiple_choice)")
} else {
question_types
.iter()
.map(|t| match t {
QuestionType::MultipleChoice => "选择题",
QuestionType::TrueFalse => "判断题",
QuestionType::FillBlank => "填空题",
QuestionType::ShortAnswer => "简答题",
QuestionType::Essay => "论述题",
_ => "选择题",
})
.collect::<Vec<_>>()
.join(",")
};
let content_section = match content {
Some(c) if !c.is_empty() => format!("\n\n参考内容:\n{}", &c[..c.len().min(3000)]),
_ => String::new(),
};
let content_note = if content.is_some() && content.map_or(false, |c| !c.is_empty()) {
"(基于提供的参考内容出题)"
} else {
""
};
let prompt = format!(
r#"你是一个专业的出题专家。请根据以下要求生成测验题目:
主题: {}
难度: {}
题目类型: {}
数量: {}{}
{}
请严格按照以下 JSON 格式输出,不要添加任何其他文字:
```json
[
{{
"question": "题目内容",
"options": ["选项A", "选项B", "选项C", "选项D"],
"correct_answer": "正确答案与options中某项完全一致",
"explanation": "答案解释",
"hint": "提示信息"
}}
]
```
要求:
1. 题目要有实际内容,不要使用占位符
2. 正确答案必须随机分布(不要总在第一个选项)
3. 每道题的选项要有区分度,干扰项要合理
4. 解释要清晰准确
5. 直接输出 JSON不要有 markdown 包裹"#,
topic, difficulty_str, type_str, count, content_section, content_note,
);
let request = CompletionRequest {
model: self.model.clone(),
system: Some("你是一个专业的出题专家只输出纯JSON格式。".to_string()),
messages: vec![zclaw_types::Message::user(&prompt)],
tools: Vec::new(),
max_tokens: Some(4096),
temperature: Some(0.7),
stop: Vec::new(),
stream: false,
};
let response = self.driver.complete(request).await.map_err(|e| {
zclaw_types::ZclawError::Internal(format!("LLM quiz generation failed: {}", e))
})?;
// Extract text from response
let text: String = response
.content
.iter()
.filter_map(|block| match block {
zclaw_runtime::driver::ContentBlock::Text { text } => Some(text.clone()),
_ => None,
})
.collect::<Vec<_>>()
.join("");
// Parse JSON from response (handle markdown code fences)
let json_str = extract_json(&text);
let raw_questions: Vec<serde_json::Value> =
serde_json::from_str(json_str).map_err(|e| {
zclaw_types::ZclawError::Internal(format!(
"Failed to parse quiz JSON: {}. Raw: {}",
e,
&text[..text.len().min(200)]
))
})?;
let questions: Vec<QuizQuestion> = raw_questions
.into_iter()
.take(count)
.map(|q| {
let options: Vec<String> = q["options"]
.as_array()
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
.unwrap_or_default();
let correct = q["correct_answer"]
.as_str()
.unwrap_or("")
.to_string();
QuizQuestion {
id: uuid_v4(),
question_type: QuestionType::MultipleChoice,
question: q["question"].as_str().unwrap_or("未知题目").to_string(),
options: if options.is_empty() { None } else { Some(options) },
correct_answer: Answer::Single(correct),
explanation: q["explanation"].as_str().map(String::from),
hints: q["hint"].as_str().map(|h| vec![h.to_string()]),
points: 10.0,
difficulty: difficulty.clone(),
tags: vec![topic.to_string()],
}
})
.collect();
if questions.is_empty() {
// Fallback to default if LLM returns nothing parseable
return DefaultQuizGenerator
.generate_questions(topic, content, count, difficulty, question_types)
.await;
}
Ok(questions)
}
}
/// Extract JSON from a string that may be wrapped in markdown code fences.
fn extract_json(text: &str) -> &str {
let trimmed = text.trim();
// Try to find ```json ... ``` block
if let Some(start) = trimmed.find("```json") {
let after_start = &trimmed[start + 7..];
if let Some(end) = after_start.find("```") {
return after_start[..end].trim();
}
}
// Try to find ``` ... ``` block
if let Some(start) = trimmed.find("```") {
let after_start = &trimmed[start + 3..];
if let Some(end) = after_start.find("```") {
return after_start[..end].trim();
}
}
// Try to find raw JSON array
if let Some(start) = trimmed.find('[') {
if let Some(end) = trimmed.rfind(']') {
return &trimmed[start..=end];
}
}
trimmed
}
/// Quiz action types
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "action", rename_all = "snake_case")]

View File

@@ -20,6 +20,7 @@ tokio-stream = { workspace = true }
futures = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
toml = { workspace = true }
thiserror = { workspace = true }
uuid = { workspace = true }
chrono = { workspace = true }

View File

@@ -252,10 +252,78 @@ fn default_skills_dir() -> Option<std::path::PathBuf> {
}
impl KernelConfig {
/// Load configuration from file
/// Load configuration from file.
///
/// Search order:
/// 1. Path from `ZCLAW_CONFIG` environment variable
/// 2. `~/.zclaw/config.toml`
/// 3. Fallback to `Self::default()`
///
/// Supports `${VAR_NAME}` environment variable interpolation in string values.
pub async fn load() -> Result<Self> {
// TODO: Load from ~/.zclaw/config.toml
Ok(Self::default())
let config_path = Self::find_config_path();
match config_path {
Some(path) => {
if !path.exists() {
tracing::debug!(target: "kernel_config", "Config file not found: {:?}, using defaults", path);
return Ok(Self::default());
}
tracing::info!(target: "kernel_config", "Loading config from: {:?}", path);
let content = std::fs::read_to_string(&path).map_err(|e| {
zclaw_types::ZclawError::Internal(format!("Failed to read config {}: {}", path.display(), e))
})?;
let interpolated = interpolate_env_vars(&content);
let mut config: KernelConfig = toml::from_str(&interpolated).map_err(|e| {
zclaw_types::ZclawError::Internal(format!("Failed to parse config {}: {}", path.display(), e))
})?;
// Resolve skills_dir if not explicitly set
if config.skills_dir.is_none() {
config.skills_dir = default_skills_dir();
}
tracing::info!(
target: "kernel_config",
model = %config.llm.model,
base_url = %config.llm.base_url,
has_api_key = !config.llm.api_key.is_empty(),
"Config loaded successfully"
);
Ok(config)
}
None => Ok(Self::default()),
}
}
/// Find the config file path.
fn find_config_path() -> Option<PathBuf> {
// 1. Environment variable override
if let Ok(path) = std::env::var("ZCLAW_CONFIG") {
return Some(PathBuf::from(path));
}
// 2. ~/.zclaw/config.toml
if let Some(home) = dirs::home_dir() {
let path = home.join(".zclaw").join("config.toml");
if path.exists() {
return Some(path);
}
}
// 3. Project root config/config.toml (for development)
let project_config = std::env::current_dir()
.ok()
.map(|cwd| cwd.join("config").join("config.toml"))?;
if project_config.exists() {
return Some(project_config);
}
None
}
/// Create the LLM driver
@@ -439,3 +507,81 @@ impl LlmConfig {
self
}
}
// === Environment variable interpolation ===
/// Replace `${VAR_NAME}` patterns in a string with environment variable values.
/// If the variable is not set, the pattern is left as-is.
fn interpolate_env_vars(content: &str) -> String {
let mut result = String::with_capacity(content.len());
let mut chars = content.char_indices().peekable();
while let Some((_, ch)) = chars.next() {
if ch == '$' && chars.peek().map(|(_, c)| *c == '{').unwrap_or(false) {
chars.next(); // consume '{'
let mut var_name = String::new();
while let Some((_, c)) = chars.peek() {
match c {
'}' => {
chars.next(); // consume '}'
if let Ok(value) = std::env::var(&var_name) {
result.push_str(&value);
} else {
result.push_str("${");
result.push_str(&var_name);
result.push('}');
}
break;
}
_ => {
var_name.push(*c);
chars.next();
}
}
}
// Handle unclosed ${... at end of string
if !content[result.len()..].contains('}') && var_name.is_empty() {
// Already consumed, nothing to do
}
} else {
result.push(ch);
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_interpolate_env_vars_basic() {
std::env::set_var("ZCLAW_TEST_VAR", "hello");
let result = interpolate_env_vars("prefix ${ZCLAW_TEST_VAR} suffix");
assert_eq!(result, "prefix hello suffix");
}
#[test]
fn test_interpolate_env_vars_missing() {
let result = interpolate_env_vars("${ZCLAW_NONEXISTENT_VAR_12345}");
assert_eq!(result, "${ZCLAW_NONEXISTENT_VAR_12345}");
}
#[test]
fn test_interpolate_env_vars_no_vars() {
let result = interpolate_env_vars("no variables here");
assert_eq!(result, "no variables here");
}
#[test]
fn test_interpolate_env_vars_multiple() {
std::env::set_var("ZCLAW_TEST_A", "alpha");
std::env::set_var("ZCLAW_TEST_B", "beta");
let result = interpolate_env_vars("${ZCLAW_TEST_A}-${ZCLAW_TEST_B}");
assert_eq!(result, "alpha-beta");
}
}

View File

@@ -1,7 +1,7 @@
//! Kernel - central coordinator
use std::sync::Arc;
use tokio::sync::{broadcast, mpsc};
use tokio::sync::{broadcast, mpsc, Mutex};
use zclaw_types::{AgentConfig, AgentId, AgentInfo, Event, Result};
use async_trait::async_trait;
use serde_json::Value;
@@ -13,7 +13,7 @@ use crate::config::KernelConfig;
use zclaw_memory::MemoryStore;
use zclaw_runtime::{AgentLoop, LlmDriver, ToolRegistry, tool::SkillExecutor};
use zclaw_skills::SkillRegistry;
use zclaw_hands::{HandRegistry, HandContext, HandResult, hands::{BrowserHand, SlideshowHand, SpeechHand, QuizHand, WhiteboardHand, ResearcherHand, CollectorHand, ClipHand, TwitterHand}};
use zclaw_hands::{HandRegistry, HandContext, HandResult, hands::{BrowserHand, SlideshowHand, SpeechHand, QuizHand, WhiteboardHand, ResearcherHand, CollectorHand, ClipHand, TwitterHand, quiz::LlmQuizGenerator}};
/// Skill executor implementation for Kernel
pub struct KernelSkillExecutor {
@@ -57,6 +57,7 @@ pub struct Kernel {
skill_executor: Arc<KernelSkillExecutor>,
hands: Arc<HandRegistry>,
trigger_manager: crate::trigger_manager::TriggerManager,
pending_approvals: Arc<Mutex<Vec<ApprovalEntry>>>,
}
impl Kernel {
@@ -85,10 +86,12 @@ impl Kernel {
// Initialize hand registry with built-in hands
let hands = Arc::new(HandRegistry::new());
let quiz_model = config.model().to_string();
let quiz_generator = Arc::new(LlmQuizGenerator::new(driver.clone(), quiz_model));
hands.register(Arc::new(BrowserHand::new())).await;
hands.register(Arc::new(SlideshowHand::new())).await;
hands.register(Arc::new(SpeechHand::new())).await;
hands.register(Arc::new(QuizHand::new())).await;
hands.register(Arc::new(QuizHand::with_generator(quiz_generator))).await;
hands.register(Arc::new(WhiteboardHand::new())).await;
hands.register(Arc::new(ResearcherHand::new())).await;
hands.register(Arc::new(CollectorHand::new())).await;
@@ -118,6 +121,7 @@ impl Kernel {
skill_executor,
hands,
trigger_manager,
pending_approvals: Arc::new(Mutex::new(Vec::new())),
})
}
@@ -306,7 +310,8 @@ impl Kernel {
.with_model(&model)
.with_skill_executor(self.skill_executor.clone())
.with_max_tokens(agent_config.max_tokens.unwrap_or_else(|| self.config.max_tokens()))
.with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()));
.with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()))
.with_compaction_threshold(15_000); // Compact when context exceeds ~15k tokens
// Build system prompt with skill information injected
let system_prompt = self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await;
@@ -327,6 +332,16 @@ impl Kernel {
&self,
agent_id: &AgentId,
message: String,
) -> Result<mpsc::Receiver<zclaw_runtime::LoopEvent>> {
self.send_message_stream_with_prompt(agent_id, message, None).await
}
/// Send a message with streaming and optional external system prompt
pub async fn send_message_stream_with_prompt(
&self,
agent_id: &AgentId,
message: String,
system_prompt_override: Option<String>,
) -> Result<mpsc::Receiver<zclaw_runtime::LoopEvent>> {
let agent_config = self.registry.get(agent_id)
.ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Agent not found: {}", agent_id)))?;
@@ -349,10 +364,14 @@ impl Kernel {
.with_model(&model)
.with_skill_executor(self.skill_executor.clone())
.with_max_tokens(agent_config.max_tokens.unwrap_or_else(|| self.config.max_tokens()))
.with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()));
.with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()))
.with_compaction_threshold(15_000); // Compact when context exceeds ~15k tokens
// Build system prompt with skill information injected
let system_prompt = self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await;
// Use external prompt if provided, otherwise build default
let system_prompt = match system_prompt_override {
Some(prompt) => prompt,
None => self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await,
};
let loop_runner = loop_runner.with_system_prompt(&system_prompt);
// Run with streaming
@@ -477,24 +496,82 @@ impl Kernel {
}
// ============================================================
// Approval Management (Stub Implementation)
// Approval Management
// ============================================================
/// List pending approvals
pub async fn list_approvals(&self) -> Vec<ApprovalEntry> {
// Stub: Return empty list
Vec::new()
let approvals = self.pending_approvals.lock().await;
approvals.iter().filter(|a| a.status == "pending").cloned().collect()
}
/// Create a pending approval (called when a needs_approval hand is triggered)
pub async fn create_approval(&self, hand_id: String, input: serde_json::Value) -> ApprovalEntry {
let entry = ApprovalEntry {
id: uuid::Uuid::new_v4().to_string(),
hand_id,
status: "pending".to_string(),
created_at: chrono::Utc::now(),
input,
};
let mut approvals = self.pending_approvals.lock().await;
approvals.push(entry.clone());
entry
}
/// Respond to an approval
pub async fn respond_to_approval(
&self,
_id: &str,
_approved: bool,
id: &str,
approved: bool,
_reason: Option<String>,
) -> Result<()> {
// Stub: Return error
Err(zclaw_types::ZclawError::NotFound(format!("Approval not found")))
let mut approvals = self.pending_approvals.lock().await;
let entry = approvals.iter_mut().find(|a| a.id == id && a.status == "pending")
.ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Approval not found: {}", id)))?;
entry.status = if approved { "approved".to_string() } else { "rejected".to_string() };
if approved {
let hand_id = entry.hand_id.clone();
let input = entry.input.clone();
drop(approvals); // Release lock before async hand execution
// Execute the hand in background
let hands = self.hands.clone();
let approvals = self.pending_approvals.clone();
let id_owned = id.to_string();
tokio::spawn(async move {
let context = HandContext::default();
let result = hands.execute(&hand_id, &context, input).await;
// Update approval status based on execution result
let mut approvals = approvals.lock().await;
if let Some(entry) = approvals.iter_mut().find(|a| a.id == id_owned) {
match result {
Ok(_) => entry.status = "completed".to_string(),
Err(e) => {
entry.status = "failed".to_string();
// Store error in input metadata
if let Some(obj) = entry.input.as_object_mut() {
obj.insert("error".to_string(), Value::String(format!("{}", e)));
}
}
}
}
});
}
Ok(())
}
/// Cancel a pending approval
pub async fn cancel_approval(&self, id: &str) -> Result<()> {
let mut approvals = self.pending_approvals.lock().await;
let entry = approvals.iter_mut().find(|a| a.id == id && a.status == "pending")
.ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Approval not found: {}", id)))?;
entry.status = "cancelled".to_string();
Ok(())
}
}

View File

@@ -20,6 +20,7 @@ tracing = { workspace = true }
# SQLite
sqlx = { workspace = true }
libsqlite3-sys = { workspace = true }
# Async utilities
futures = { workspace = true }

View File

@@ -46,11 +46,14 @@ pub async fn export_files(
.map_err(|e| ActionError::Export(format!("Write error: {}", e)))?;
}
ExportFormat::Pptx => {
// Will integrate with zclaw-kernel export
return Err(ActionError::Export("PPTX export requires kernel integration".to_string()));
return Err(ActionError::Export(
"PPTX 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 JSON 格式导出后转换。".to_string(),
));
}
ExportFormat::Pdf => {
return Err(ActionError::Export("PDF export not yet implemented".to_string()));
return Err(ActionError::Export(
"PDF 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 HTML 格式导出后通过浏览器打印为 PDF。".to_string(),
));
}
}

View File

@@ -1,21 +0,0 @@
//! Hand execution action
use std::collections::HashMap;
use serde_json::Value;
use super::ActionError;
/// Execute a hand action
pub async fn execute_hand(
hand_id: &str,
action: &str,
_params: HashMap<String, Value>,
) -> Result<Value, ActionError> {
// This will be implemented by injecting the hand registry
// For now, return an error indicating it needs configuration
Err(ActionError::Hand(format!(
"Hand '{}' action '{}' requires hand registry configuration",
hand_id, action
)))
}

View File

@@ -7,8 +7,6 @@ mod parallel;
mod render;
mod export;
mod http;
mod skill;
mod hand;
mod orchestration;
pub use llm::*;
@@ -16,8 +14,6 @@ pub use parallel::*;
pub use render::*;
pub use export::*;
pub use http::*;
pub use skill::*;
pub use hand::*;
pub use orchestration::*;
use std::collections::HashMap;
@@ -256,11 +252,14 @@ impl ActionRegistry {
tokio::fs::write(&path, content).await?;
}
ExportFormat::Pptx => {
// Will integrate with pptx exporter
return Err(ActionError::Export("PPTX export not yet implemented".to_string()));
return Err(ActionError::Export(
"PPTX 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 JSON 格式导出后转换。".to_string(),
));
}
ExportFormat::Pdf => {
return Err(ActionError::Export("PDF export not yet implemented".to_string()));
return Err(ActionError::Export(
"PDF 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 HTML 格式导出后通过浏览器打印为 PDF。".to_string(),
));
}
}

View File

@@ -1,20 +0,0 @@
//! Skill execution action
use std::collections::HashMap;
use serde_json::Value;
use super::ActionError;
/// Execute a skill by ID
pub async fn execute_skill(
skill_id: &str,
_input: HashMap<String, Value>,
) -> Result<Value, ActionError> {
// This will be implemented by injecting the skill registry
// For now, return an error indicating it needs configuration
Err(ActionError::Skill(format!(
"Skill '{}' execution requires skill registry configuration",
skill_id
)))
}

View File

@@ -10,11 +10,9 @@
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use futures::future::join_all;
use serde_json::{Value, json};
use tokio::sync::RwLock;
use crate::types_v2::{Stage, ConditionalBranch, PresentationType};
use crate::types_v2::{Stage, ConditionalBranch};
use crate::engine::context::{ExecutionContextV2, ContextError};
/// Stage execution result
@@ -242,14 +240,6 @@ impl StageEngine {
Ok(result)
}
Err(e) => {
let result = StageResult {
stage_id: stage_id.clone(),
output: Value::Null,
status: StageStatus::Failed,
error: Some(e.to_string()),
duration_ms,
};
self.emit_event(StageEvent::Error {
stage_id,
error: e.to_string(),
@@ -312,7 +302,7 @@ impl StageEngine {
stage_id: &str,
each: &str,
stage_template: &Stage,
max_workers: usize,
_max_workers: usize,
context: &mut ExecutionContextV2,
) -> Result<Value, StageError> {
// Resolve the array to iterate over
@@ -419,7 +409,7 @@ impl StageEngine {
/// Execute compose stage
async fn execute_compose(
&self,
stage_id: &str,
_stage_id: &str,
template: &str,
context: &ExecutionContextV2,
) -> Result<Value, StageError> {
@@ -568,7 +558,8 @@ impl StageEngine {
Ok(resolved_value)
}
/// Clone with drivers
/// Clone with drivers (reserved for future use)
#[allow(dead_code)]
fn clone_with_drivers(&self) -> Self {
Self {
llm_driver: self.llm_driver.clone(),

View File

@@ -396,6 +396,7 @@ pub trait LlmIntentDriver: Send + Sync {
}
/// Default LLM driver implementation using prompt-based matching
#[allow(dead_code)]
pub struct DefaultLlmIntentDriver {
/// Model ID to use
model_id: String,

View File

@@ -57,6 +57,7 @@ pub mod intent;
pub mod engine;
pub mod presentation;
// Glob re-exports with explicit disambiguation for conflicting names
pub use types::*;
pub use types_v2::*;
pub use parser::*;
@@ -67,6 +68,14 @@ pub use trigger::*;
pub use intent::*;
pub use engine::*;
pub use presentation::*;
// Explicit re-exports: presentation::* wins for PresentationType/ExportFormat
// types_v2::* wins for InputMode, engine::* wins for LoopContext
pub use presentation::PresentationType;
pub use presentation::ExportFormat;
pub use types_v2::InputMode;
pub use engine::context::LoopContext;
pub use actions::ActionRegistry;
pub use actions::{LlmActionDriver, SkillActionDriver, HandActionDriver, OrchestrationActionDriver};

View File

@@ -13,7 +13,6 @@
//! - Better recommendations for ambiguous cases
use serde_json::Value;
use std::collections::HashMap;
use super::types::*;

View File

@@ -254,13 +254,13 @@ pub fn compile_pattern(pattern: &str) -> Result<CompiledPattern, PatternError> {
'{' => {
// Named capture group
let mut name = String::new();
let mut has_type = false;
let mut _has_type = false;
while let Some(c) = chars.next() {
match c {
'}' => break,
':' => {
has_type = true;
_has_type = true;
// Skip type part
while let Some(nc) = chars.peek() {
if *nc == '}' {

View File

@@ -0,0 +1,365 @@
//! Context compaction for the agent loop.
//!
//! Provides rule-based token estimation and message compaction to prevent
//! conversations from exceeding LLM context windows. When the estimated
//! token count exceeds the configured threshold, older messages are
//! summarized into a single system message and only recent messages are
//! retained.
use zclaw_types::Message;
/// Number of recent messages to preserve after compaction.
const DEFAULT_KEEP_RECENT: usize = 6;
/// Heuristic token count estimation.
///
/// CJK characters ≈ 1.5 tokens each, English words ≈ 1.3 tokens each.
/// Intentionally conservative (overestimates) to avoid hitting real limits.
pub fn estimate_tokens(text: &str) -> usize {
if text.is_empty() {
return 0;
}
let mut tokens: f64 = 0.0;
for char in text.chars() {
let code = char as u32;
if (0x4E00..=0x9FFF).contains(&code)
|| (0x3400..=0x4DBF).contains(&code)
|| (0x20000..=0x2A6DF).contains(&code)
|| (0xF900..=0xFAFF).contains(&code)
{
// CJK ideographs — ~1.5 tokens
tokens += 1.5;
} else if (0x3000..=0x303F).contains(&code) || (0xFF00..=0xFFEF).contains(&code) {
// CJK / fullwidth punctuation — ~1.0 token
tokens += 1.0;
} else if char == ' ' || char == '\n' || char == '\t' {
// whitespace
tokens += 0.25;
} else {
// ASCII / Latin characters — roughly 4 chars per token
tokens += 0.3;
}
}
tokens.ceil() as usize
}
/// Estimate total tokens for a list of messages (including framing overhead).
pub fn estimate_messages_tokens(messages: &[Message]) -> usize {
let mut total = 0;
for msg in messages {
match msg {
Message::User { content } => {
total += estimate_tokens(content);
total += 4;
}
Message::Assistant { content, thinking } => {
total += estimate_tokens(content);
if let Some(th) = thinking {
total += estimate_tokens(th);
}
total += 4;
}
Message::System { content } => {
total += estimate_tokens(content);
total += 4;
}
Message::ToolUse { input, .. } => {
total += estimate_tokens(&input.to_string());
total += 4;
}
Message::ToolResult { output, .. } => {
total += estimate_tokens(&output.to_string());
total += 4;
}
}
}
total
}
/// Compact a message list by summarizing old messages and keeping recent ones.
///
/// When `messages.len() > keep_recent`, the oldest messages are summarized
/// into a single system message. System messages at the beginning of the
/// conversation are always preserved.
///
/// Returns the compacted message list and the number of original messages removed.
pub fn compact_messages(messages: Vec<Message>, keep_recent: usize) -> (Vec<Message>, usize) {
if messages.len() <= keep_recent {
return (messages, 0);
}
// Preserve leading system messages (they contain compaction summaries from prior runs)
let leading_system_count = messages
.iter()
.take_while(|m| matches!(m, Message::System { .. }))
.count();
// Calculate split point: keep leading system + recent messages
let keep_from_end = keep_recent.min(messages.len().saturating_sub(leading_system_count));
let split_index = messages.len().saturating_sub(keep_from_end);
// Ensure we keep at least the leading system messages
let split_index = split_index.max(leading_system_count);
if split_index == 0 {
return (messages, 0);
}
let old_messages = &messages[..split_index];
let recent_messages = &messages[split_index..];
let summary = generate_summary(old_messages);
let removed_count = old_messages.len();
let mut compacted = Vec::with_capacity(1 + recent_messages.len());
compacted.push(Message::system(summary));
compacted.extend(recent_messages.iter().cloned());
(compacted, removed_count)
}
/// Check if compaction should be triggered and perform it if needed.
///
/// Returns the (possibly compacted) message list.
pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
let tokens = estimate_messages_tokens(&messages);
if tokens < threshold {
return messages;
}
tracing::info!(
"[Compaction] Triggered: {} tokens > {} threshold, {} messages",
tokens,
threshold,
messages.len(),
);
let (compacted, removed) = compact_messages(messages, DEFAULT_KEEP_RECENT);
tracing::info!(
"[Compaction] Removed {} messages, {} remain",
removed,
compacted.len(),
);
compacted
}
/// Generate a rule-based summary of old messages.
fn generate_summary(messages: &[Message]) -> String {
if messages.is_empty() {
return "[对话开始]".to_string();
}
let mut sections: Vec<String> = vec!["[以下是之前对话的摘要]".to_string()];
let mut user_count = 0;
let mut assistant_count = 0;
let mut topics: Vec<String> = Vec::new();
for msg in messages {
match msg {
Message::User { content } => {
user_count += 1;
let topic = extract_topic(content);
if let Some(t) = topic {
topics.push(t);
}
}
Message::Assistant { .. } => {
assistant_count += 1;
}
Message::System { content } => {
// Skip system messages that are previous compaction summaries
if !content.starts_with("[以下是之前对话的摘要]") {
sections.push(format!("系统提示: {}", truncate(content, 60)));
}
}
Message::ToolUse { tool, .. } => {
sections.push(format!("工具调用: {}", tool.as_str()));
}
Message::ToolResult { .. } => {
// Skip tool results in summary
}
}
}
if !topics.is_empty() {
let topic_list: Vec<String> = topics.iter().take(8).cloned().collect();
sections.push(format!("讨论主题: {}", topic_list.join("; ")));
}
sections.push(format!(
"(已压缩 {} 条消息,其中用户 {} 条,助手 {} 条)",
messages.len(),
user_count,
assistant_count,
));
let summary = sections.join("\n");
// Enforce max length
let max_chars = 800;
if summary.len() > max_chars {
format!("{}...\n(摘要已截断)", &summary[..max_chars])
} else {
summary
}
}
/// Extract the main topic from a user message (first sentence or first 50 chars).
fn extract_topic(content: &str) -> Option<String> {
let trimmed = content.trim();
if trimmed.is_empty() {
return None;
}
// Find sentence end markers
for (i, char) in trimmed.char_indices() {
if char == '。' || char == '' || char == '' || char == '\n' {
let end = i + char.len_utf8();
if end <= 80 {
return Some(trimmed[..end].trim().to_string());
}
break;
}
}
if trimmed.chars().count() <= 50 {
return Some(trimmed.to_string());
}
Some(format!("{}...", trimmed.chars().take(50).collect::<String>()))
}
/// Truncate text to max_chars at char boundary.
fn truncate(text: &str, max_chars: usize) -> String {
if text.chars().count() <= max_chars {
return text.to_string();
}
let truncated: String = text.chars().take(max_chars).collect();
format!("{}...", truncated)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_estimate_tokens_empty() {
assert_eq!(estimate_tokens(""), 0);
}
#[test]
fn test_estimate_tokens_english() {
let tokens = estimate_tokens("Hello world");
assert!(tokens > 0);
}
#[test]
fn test_estimate_tokens_cjk() {
let tokens = estimate_tokens("你好世界");
assert!(tokens > 3); // CJK chars are ~1.5 tokens each
}
#[test]
fn test_estimate_messages_tokens() {
let messages = vec![
Message::user("Hello"),
Message::assistant("Hi there"),
];
let tokens = estimate_messages_tokens(&messages);
assert!(tokens > 0);
}
#[test]
fn test_compact_messages_under_threshold() {
let messages = vec![
Message::user("Hello"),
Message::assistant("Hi"),
];
let (result, removed) = compact_messages(messages, 6);
assert_eq!(removed, 0);
assert_eq!(result.len(), 2);
}
#[test]
fn test_compact_messages_over_threshold() {
let messages: Vec<Message> = (0..10)
.flat_map(|i| {
vec![
Message::user(format!("Question {}", i)),
Message::assistant(format!("Answer {}", i)),
]
})
.collect();
let (result, removed) = compact_messages(messages, 4);
assert!(removed > 0);
// Should have: 1 summary + 4 recent messages
assert_eq!(result.len(), 5);
// First message should be a system summary
assert!(matches!(&result[0], Message::System { .. }));
}
#[test]
fn test_compact_preserves_leading_system() {
let messages = vec![
Message::system("You are helpful"),
Message::user("Q1"),
Message::assistant("A1"),
Message::user("Q2"),
Message::assistant("A2"),
Message::user("Q3"),
Message::assistant("A3"),
];
let (result, removed) = compact_messages(messages, 4);
assert!(removed > 0);
// Should start with compaction summary, then recent messages
assert!(matches!(&result[0], Message::System { .. }));
}
#[test]
fn test_maybe_compact_under_threshold() {
let messages = vec![
Message::user("Short message"),
Message::assistant("Short reply"),
];
let result = maybe_compact(messages, 100_000);
assert_eq!(result.len(), 2);
}
#[test]
fn test_extract_topic_sentence() {
let topic = extract_topic("什么是Rust的所有权系统").unwrap();
assert!(topic.contains("所有权"));
}
#[test]
fn test_extract_topic_short() {
let topic = extract_topic("Hello").unwrap();
assert_eq!(topic, "Hello");
}
#[test]
fn test_extract_topic_long() {
let long = "This is a very long message that exceeds fifty characters in total length";
let topic = extract_topic(long).unwrap();
assert!(topic.ends_with("..."));
}
#[test]
fn test_generate_summary() {
let messages = vec![
Message::user("What is Rust?"),
Message::assistant("Rust is a systems programming language"),
Message::user("How does ownership work?"),
Message::assistant("Ownership is Rust's memory management system"),
];
let summary = generate_summary(&messages);
assert!(summary.contains("摘要"));
assert!(summary.contains("2"));
}
}

View File

@@ -1,9 +1,17 @@
//! Google Gemini driver implementation
//!
//! Implements the Gemini REST API v1beta with full support for:
//! - Text generation (complete and streaming)
//! - Tool / function calling
//! - System instructions
//! - Token usage reporting
use async_trait::async_trait;
use futures::Stream;
use async_stream::stream;
use futures::{Stream, StreamExt};
use secrecy::{ExposeSecret, SecretString};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::pin::Pin;
use zclaw_types::{Result, ZclawError};
@@ -11,7 +19,6 @@ use super::{CompletionRequest, CompletionResponse, ContentBlock, LlmDriver, Stop
use crate::stream::StreamChunk;
/// Google Gemini driver
#[allow(dead_code)] // TODO: Implement full Gemini API support
pub struct GeminiDriver {
client: Client,
api_key: SecretString,
@@ -21,11 +28,31 @@ pub struct GeminiDriver {
impl GeminiDriver {
pub fn new(api_key: SecretString) -> Self {
Self {
client: Client::new(),
client: Client::builder()
.user_agent(crate::USER_AGENT)
.http1_only()
.timeout(std::time::Duration::from_secs(120))
.connect_timeout(std::time::Duration::from_secs(30))
.build()
.unwrap_or_else(|_| Client::new()),
api_key,
base_url: "https://generativelanguage.googleapis.com/v1beta".to_string(),
}
}
pub fn with_base_url(api_key: SecretString, base_url: String) -> Self {
Self {
client: Client::builder()
.user_agent(crate::USER_AGENT)
.http1_only()
.timeout(std::time::Duration::from_secs(120))
.connect_timeout(std::time::Duration::from_secs(30))
.build()
.unwrap_or_else(|_| Client::new()),
api_key,
base_url,
}
}
}
#[async_trait]
@@ -39,25 +66,594 @@ impl LlmDriver for GeminiDriver {
}
async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
// TODO: Implement actual API call
Ok(CompletionResponse {
content: vec![ContentBlock::Text {
text: "Gemini driver not yet implemented".to_string(),
}],
model: request.model,
input_tokens: 0,
output_tokens: 0,
stop_reason: StopReason::EndTurn,
})
let api_request = self.build_api_request(&request);
let url = format!(
"{}/models/{}:generateContent?key={}",
self.base_url,
request.model,
self.api_key.expose_secret()
);
tracing::debug!(target: "gemini_driver", "Sending request to: {}", url);
let response = self.client
.post(&url)
.header("content-type", "application/json")
.json(&api_request)
.send()
.await
.map_err(|e| ZclawError::LlmError(format!("HTTP request failed: {}", e)))?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
tracing::warn!(target: "gemini_driver", "API error {}: {}", status, body);
return Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
}
let api_response: GeminiResponse = response
.json()
.await
.map_err(|e| ZclawError::LlmError(format!("Failed to parse response: {}", e)))?;
Ok(self.convert_response(api_response, request.model))
}
fn stream(
&self,
_request: CompletionRequest,
request: CompletionRequest,
) -> Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send + '_>> {
// Placeholder - return error stream
Box::pin(futures::stream::once(async {
Err(ZclawError::LlmError("Gemini streaming not yet implemented".to_string()))
}))
let api_request = self.build_api_request(&request);
let url = format!(
"{}/models/{}:streamGenerateContent?alt=sse&key={}",
self.base_url,
request.model,
self.api_key.expose_secret()
);
tracing::debug!(target: "gemini_driver", "Starting stream request to: {}", url);
Box::pin(stream! {
let response = match self.client
.post(&url)
.header("content-type", "application/json")
.timeout(std::time::Duration::from_secs(120))
.json(&api_request)
.send()
.await
{
Ok(r) => {
tracing::debug!(target: "gemini_driver", "Stream response status: {}", r.status());
r
},
Err(e) => {
tracing::error!(target: "gemini_driver", "HTTP request failed: {:?}", e);
yield Err(ZclawError::LlmError(format!("HTTP request failed: {}", e)));
return;
}
};
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
return;
}
let mut byte_stream = response.bytes_stream();
let mut accumulated_tool_calls: std::collections::HashMap<usize, (String, String)> = std::collections::HashMap::new();
while let Some(chunk_result) = byte_stream.next().await {
let chunk = match chunk_result {
Ok(c) => c,
Err(e) => {
yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
continue;
}
};
let text = String::from_utf8_lossy(&chunk);
for line in text.lines() {
if let Some(data) = line.strip_prefix("data: ") {
match serde_json::from_str::<GeminiStreamResponse>(data) {
Ok(resp) => {
if let Some(candidate) = resp.candidates.first() {
let content = match &candidate.content {
Some(c) => c,
None => continue,
};
let parts = &content.parts;
for (idx, part) in parts.iter().enumerate() {
// Handle text content
if let Some(text) = &part.text {
if !text.is_empty() {
yield Ok(StreamChunk::TextDelta { delta: text.clone() });
}
}
// Handle function call (tool use)
if let Some(fc) = &part.function_call {
let name = fc.name.clone().unwrap_or_default();
let args = fc.args.clone().unwrap_or(serde_json::Value::Object(Default::default()));
// Emit ToolUseStart if this is a new tool call
if !accumulated_tool_calls.contains_key(&idx) {
accumulated_tool_calls.insert(idx, (name.clone(), String::new()));
yield Ok(StreamChunk::ToolUseStart {
id: format!("gemini_call_{}", idx),
name,
});
}
// Emit the function arguments as delta
let args_str = serde_json::to_string(&args).unwrap_or_default();
let call_id = format!("gemini_call_{}", idx);
yield Ok(StreamChunk::ToolUseDelta {
id: call_id.clone(),
delta: args_str.clone(),
});
// Accumulate
if let Some(entry) = accumulated_tool_calls.get_mut(&idx) {
entry.1 = args_str;
}
}
}
// When the candidate is finished, emit ToolUseEnd for all pending
if let Some(ref finish_reason) = candidate.finish_reason {
let is_final = finish_reason == "STOP" || finish_reason == "MAX_TOKENS";
if is_final {
// Emit ToolUseEnd for all accumulated tool calls
for (idx, (_name, args_str)) in &accumulated_tool_calls {
let input: serde_json::Value = if args_str.is_empty() {
serde_json::json!({})
} else {
serde_json::from_str(args_str).unwrap_or_else(|e| {
tracing::warn!(target: "gemini_driver", "Failed to parse tool args '{}': {}", args_str, e);
serde_json::json!({})
})
};
yield Ok(StreamChunk::ToolUseEnd {
id: format!("gemini_call_{}", idx),
input,
});
}
// Extract usage metadata from the response
let usage = resp.usage_metadata.as_ref();
let input_tokens = usage.map(|u| u.prompt_token_count.unwrap_or(0)).unwrap_or(0);
let output_tokens = usage.map(|u| u.candidates_token_count.unwrap_or(0)).unwrap_or(0);
let stop_reason = match finish_reason.as_str() {
"STOP" => "end_turn",
"MAX_TOKENS" => "max_tokens",
"SAFETY" => "error",
"RECITATION" => "error",
_ => "end_turn",
};
yield Ok(StreamChunk::Complete {
input_tokens,
output_tokens,
stop_reason: stop_reason.to_string(),
});
}
}
}
}
Err(e) => {
tracing::warn!(target: "gemini_driver", "Failed to parse SSE event: {} - {}", e, data);
}
}
}
}
}
})
}
}
impl GeminiDriver {
/// Convert a CompletionRequest into the Gemini API request format.
///
/// Key mapping decisions:
/// - `system` prompt maps to `systemInstruction`
/// - Messages use Gemini's `contents` array with `role`/`parts`
/// - Tool definitions use `functionDeclarations`
/// - Tool results are sent as `functionResponse` parts in `user` messages
fn build_api_request(&self, request: &CompletionRequest) -> GeminiRequest {
let mut contents: Vec<GeminiContent> = Vec::new();
for msg in &request.messages {
match msg {
zclaw_types::Message::User { content } => {
contents.push(GeminiContent {
role: "user".to_string(),
parts: vec![GeminiPart {
text: Some(content.clone()),
inline_data: None,
function_call: None,
function_response: None,
}],
});
}
zclaw_types::Message::Assistant { content, thinking } => {
let mut parts = Vec::new();
// Gemini does not have a native "thinking" field, so we prepend
// any thinking content as a text part with a marker.
if let Some(think) = thinking {
if !think.is_empty() {
parts.push(GeminiPart {
text: Some(format!("[thinking]\n{}\n[/thinking]", think)),
inline_data: None,
function_call: None,
function_response: None,
});
}
}
parts.push(GeminiPart {
text: Some(content.clone()),
inline_data: None,
function_call: None,
function_response: None,
});
contents.push(GeminiContent {
role: "model".to_string(),
parts,
});
}
zclaw_types::Message::ToolUse { id: _, tool, input } => {
// Tool use from the assistant is represented as a functionCall part
let args = if input.is_null() {
serde_json::json!({})
} else {
input.clone()
};
contents.push(GeminiContent {
role: "model".to_string(),
parts: vec![GeminiPart {
text: None,
inline_data: None,
function_call: Some(GeminiFunctionCall {
name: Some(tool.to_string()),
args: Some(args),
}),
function_response: None,
}],
});
}
zclaw_types::Message::ToolResult { tool_call_id, tool, output, is_error } => {
// Tool results are sent as functionResponse parts in a "user" role message.
// Gemini requires that function responses reference the function name
// and include the response wrapped in a "result" or "error" key.
let response_content = if *is_error {
serde_json::json!({ "error": output.to_string() })
} else {
serde_json::json!({ "result": output.clone() })
};
contents.push(GeminiContent {
role: "user".to_string(),
parts: vec![GeminiPart {
text: None,
inline_data: None,
function_call: None,
function_response: Some(GeminiFunctionResponse {
name: tool.to_string(),
response: response_content,
}),
}],
});
// Gemini ignores tool_call_id, but we log it for debugging
let _ = tool_call_id;
}
zclaw_types::Message::System { content } => {
// System messages are converted to user messages with system context.
// Note: the primary system prompt is handled via systemInstruction.
// Inline system messages in conversation history become user messages.
contents.push(GeminiContent {
role: "user".to_string(),
parts: vec![GeminiPart {
text: Some(content.clone()),
inline_data: None,
function_call: None,
function_response: None,
}],
});
}
}
}
// Build tool declarations
let function_declarations: Vec<GeminiFunctionDeclaration> = request.tools
.iter()
.map(|t| GeminiFunctionDeclaration {
name: t.name.clone(),
description: t.description.clone(),
parameters: t.input_schema.clone(),
})
.collect();
// Build generation config
let mut generation_config = GeminiGenerationConfig::default();
if let Some(temp) = request.temperature {
generation_config.temperature = Some(temp);
}
if let Some(max) = request.max_tokens {
generation_config.max_output_tokens = Some(max);
}
if !request.stop.is_empty() {
generation_config.stop_sequences = Some(request.stop.clone());
}
// Build system instruction
let system_instruction = request.system.as_ref().map(|s| GeminiSystemInstruction {
parts: vec![GeminiPart {
text: Some(s.clone()),
inline_data: None,
function_call: None,
function_response: None,
}],
});
GeminiRequest {
contents,
system_instruction,
generation_config: Some(generation_config),
tools: if function_declarations.is_empty() {
None
} else {
Some(vec![GeminiTool {
function_declarations,
}])
},
}
}
/// Convert a Gemini API response into a CompletionResponse.
fn convert_response(&self, api_response: GeminiResponse, model: String) -> CompletionResponse {
let candidate = api_response.candidates.first();
let (content, stop_reason) = match candidate {
Some(c) => {
let parts = c.content.as_ref()
.map(|content| content.parts.as_slice())
.unwrap_or(&[]);
let mut blocks: Vec<ContentBlock> = Vec::new();
let mut has_tool_use = false;
for part in parts {
// Handle text content
if let Some(text) = &part.text {
// Skip thinking markers we injected
if text.starts_with("[thinking]\n") && text.contains("[/thinking]") {
let thinking_content = text
.strip_prefix("[thinking]\n")
.and_then(|s| s.strip_suffix("\n[/thinking]"))
.unwrap_or("");
if !thinking_content.is_empty() {
blocks.push(ContentBlock::Thinking {
thinking: thinking_content.to_string(),
});
}
} else if !text.is_empty() {
blocks.push(ContentBlock::Text { text: text.clone() });
}
}
// Handle function call (tool use)
if let Some(fc) = &part.function_call {
has_tool_use = true;
blocks.push(ContentBlock::ToolUse {
id: format!("gemini_call_{}", blocks.len()),
name: fc.name.clone().unwrap_or_default(),
input: fc.args.clone().unwrap_or(serde_json::Value::Object(Default::default())),
});
}
}
// If there are no content blocks, add an empty text block
if blocks.is_empty() {
blocks.push(ContentBlock::Text { text: String::new() });
}
let stop = match c.finish_reason.as_deref() {
Some("STOP") => StopReason::EndTurn,
Some("MAX_TOKENS") => StopReason::MaxTokens,
Some("SAFETY") => StopReason::Error,
Some("RECITATION") => StopReason::Error,
Some("TOOL_USE") => StopReason::ToolUse,
_ => {
if has_tool_use {
StopReason::ToolUse
} else {
StopReason::EndTurn
}
}
};
(blocks, stop)
}
None => {
tracing::warn!(target: "gemini_driver", "No candidates in response");
(
vec![ContentBlock::Text { text: String::new() }],
StopReason::EndTurn,
)
}
};
let usage = api_response.usage_metadata.as_ref();
let input_tokens = usage.map(|u| u.prompt_token_count.unwrap_or(0)).unwrap_or(0);
let output_tokens = usage.map(|u| u.candidates_token_count.unwrap_or(0)).unwrap_or(0);
CompletionResponse {
content,
model,
input_tokens,
output_tokens,
stop_reason,
}
}
}
// ---------------------------------------------------------------------------
// Gemini API request types
// ---------------------------------------------------------------------------
#[derive(Serialize)]
struct GeminiRequest {
contents: Vec<GeminiContent>,
#[serde(skip_serializing_if = "Option::is_none")]
system_instruction: Option<GeminiSystemInstruction>,
#[serde(skip_serializing_if = "Option::is_none")]
generation_config: Option<GeminiGenerationConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
tools: Option<Vec<GeminiTool>>,
}
#[derive(Serialize)]
struct GeminiContent {
role: String,
parts: Vec<GeminiPart>,
}
#[derive(Serialize, Clone)]
struct GeminiPart {
#[serde(skip_serializing_if = "Option::is_none")]
text: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
inline_data: Option<serde_json::Value>,
#[serde(rename = "functionCall", skip_serializing_if = "Option::is_none")]
function_call: Option<GeminiFunctionCall>,
#[serde(rename = "functionResponse", skip_serializing_if = "Option::is_none")]
function_response: Option<GeminiFunctionResponse>,
}
#[derive(Serialize)]
struct GeminiSystemInstruction {
parts: Vec<GeminiPart>,
}
#[derive(Serialize)]
struct GeminiGenerationConfig {
#[serde(skip_serializing_if = "Option::is_none")]
temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
max_output_tokens: Option<u32>,
#[serde(rename = "stopSequences", skip_serializing_if = "Option::is_none")]
stop_sequences: Option<Vec<String>>,
}
impl Default for GeminiGenerationConfig {
fn default() -> Self {
Self {
temperature: None,
max_output_tokens: None,
stop_sequences: None,
}
}
}
#[derive(Serialize)]
struct GeminiTool {
#[serde(rename = "functionDeclarations")]
function_declarations: Vec<GeminiFunctionDeclaration>,
}
#[derive(Serialize)]
struct GeminiFunctionDeclaration {
name: String,
description: String,
parameters: serde_json::Value,
}
#[derive(Serialize, Clone)]
struct GeminiFunctionCall {
#[serde(skip_serializing_if = "Option::is_none")]
name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
args: Option<serde_json::Value>,
}
#[derive(Serialize, Clone)]
struct GeminiFunctionResponse {
name: String,
response: serde_json::Value,
}
// ---------------------------------------------------------------------------
// Gemini API response types
// ---------------------------------------------------------------------------
#[derive(Deserialize)]
struct GeminiResponse {
#[serde(default)]
candidates: Vec<GeminiCandidate>,
#[serde(default)]
usage_metadata: Option<GeminiUsageMetadata>,
}
#[derive(Debug, Deserialize)]
struct GeminiCandidate {
#[serde(default)]
content: Option<GeminiResponseContent>,
#[serde(default)]
finish_reason: Option<String>,
}
#[derive(Debug, Deserialize)]
struct GeminiResponseContent {
#[serde(default)]
parts: Vec<GeminiResponsePart>,
#[serde(default)]
#[allow(dead_code)]
role: Option<String>,
}
#[derive(Debug, Deserialize)]
struct GeminiResponsePart {
#[serde(default)]
text: Option<String>,
#[serde(rename = "functionCall", default)]
function_call: Option<GeminiResponseFunctionCall>,
}
#[derive(Debug, Deserialize)]
struct GeminiResponseFunctionCall {
#[serde(default)]
name: Option<String>,
#[serde(default)]
args: Option<serde_json::Value>,
}
#[derive(Debug, Deserialize)]
struct GeminiUsageMetadata {
#[serde(default)]
prompt_token_count: Option<u32>,
#[serde(default)]
candidates_token_count: Option<u32>,
#[serde(default)]
#[allow(dead_code)]
total_token_count: Option<u32>,
}
// ---------------------------------------------------------------------------
// Gemini streaming types
// ---------------------------------------------------------------------------
/// Streaming response from the Gemini SSE endpoint.
/// Each SSE event contains the same structure as the non-streaming response,
/// but with incremental content.
#[derive(Debug, Deserialize)]
struct GeminiStreamResponse {
#[serde(default)]
candidates: Vec<GeminiCandidate>,
#[serde(default)]
usage_metadata: Option<GeminiUsageMetadata>,
}

View File

@@ -1,40 +1,250 @@
//! Local LLM driver (Ollama, LM Studio, vLLM, etc.)
//!
//! Uses the OpenAI-compatible API format. The only differences from the
//! OpenAI driver are: no API key is required, and base_url points to a
//! local server.
use async_trait::async_trait;
use futures::Stream;
use async_stream::stream;
use futures::{Stream, StreamExt};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::pin::Pin;
use zclaw_types::{Result, ZclawError};
use super::{CompletionRequest, CompletionResponse, ContentBlock, LlmDriver, StopReason};
use crate::stream::StreamChunk;
/// Local LLM driver for Ollama, LM Studio, vLLM, etc.
#[allow(dead_code)] // TODO: Implement full Local driver support
/// Local LLM driver for Ollama, LM Studio, vLLM, and other OpenAI-compatible servers.
pub struct LocalDriver {
client: Client,
base_url: String,
}
impl LocalDriver {
/// Create a driver pointing at a custom OpenAI-compatible endpoint.
///
/// The `base_url` should end with `/v1` (e.g. `http://localhost:8080/v1`).
pub fn new(base_url: impl Into<String>) -> Self {
Self {
client: Client::new(),
client: Client::builder()
.user_agent(crate::USER_AGENT)
.http1_only()
.timeout(std::time::Duration::from_secs(300)) // 5 min -- local inference can be slow
.connect_timeout(std::time::Duration::from_secs(10)) // short connect timeout
.build()
.unwrap_or_else(|_| Client::new()),
base_url: base_url.into(),
}
}
/// Ollama default endpoint (`http://localhost:11434/v1`).
pub fn ollama() -> Self {
Self::new("http://localhost:11434/v1")
}
/// LM Studio default endpoint (`http://localhost:1234/v1`).
pub fn lm_studio() -> Self {
Self::new("http://localhost:1234/v1")
}
/// vLLM default endpoint (`http://localhost:8000/v1`).
pub fn vllm() -> Self {
Self::new("http://localhost:8000/v1")
}
// ----------------------------------------------------------------
// Request / response conversion (OpenAI-compatible format)
// ----------------------------------------------------------------
fn build_api_request(&self, request: &CompletionRequest) -> LocalApiRequest {
let messages: Vec<LocalApiMessage> = request
.messages
.iter()
.filter_map(|msg| match msg {
zclaw_types::Message::User { content } => Some(LocalApiMessage {
role: "user".to_string(),
content: Some(content.clone()),
tool_calls: None,
}),
zclaw_types::Message::Assistant {
content,
thinking: _,
} => Some(LocalApiMessage {
role: "assistant".to_string(),
content: Some(content.clone()),
tool_calls: None,
}),
zclaw_types::Message::System { content } => Some(LocalApiMessage {
role: "system".to_string(),
content: Some(content.clone()),
tool_calls: None,
}),
zclaw_types::Message::ToolUse {
id, tool, input, ..
} => {
let args = if input.is_null() {
"{}".to_string()
} else {
serde_json::to_string(input).unwrap_or_else(|_| "{}".to_string())
};
Some(LocalApiMessage {
role: "assistant".to_string(),
content: None,
tool_calls: Some(vec![LocalApiToolCall {
id: id.clone(),
r#type: "function".to_string(),
function: LocalFunctionCall {
name: tool.to_string(),
arguments: args,
},
}]),
})
}
zclaw_types::Message::ToolResult {
output, is_error, ..
} => Some(LocalApiMessage {
role: "tool".to_string(),
content: Some(if *is_error {
format!("Error: {}", output)
} else {
output.to_string()
}),
tool_calls: None,
}),
})
.collect();
// Prepend system prompt when provided.
let mut messages = messages;
if let Some(system) = &request.system {
messages.insert(
0,
LocalApiMessage {
role: "system".to_string(),
content: Some(system.clone()),
tool_calls: None,
},
);
}
let tools: Vec<LocalApiTool> = request
.tools
.iter()
.map(|t| LocalApiTool {
r#type: "function".to_string(),
function: LocalFunctionDef {
name: t.name.clone(),
description: t.description.clone(),
parameters: t.input_schema.clone(),
},
})
.collect();
LocalApiRequest {
model: request.model.clone(),
messages,
max_tokens: request.max_tokens,
temperature: request.temperature,
stop: if request.stop.is_empty() {
None
} else {
Some(request.stop.clone())
},
stream: request.stream,
tools: if tools.is_empty() {
None
} else {
Some(tools)
},
}
}
fn convert_response(
&self,
api_response: LocalApiResponse,
model: String,
) -> CompletionResponse {
let choice = api_response.choices.first();
let (content, stop_reason) = match choice {
Some(c) => {
let has_tool_calls = c
.message
.tool_calls
.as_ref()
.map(|tc| !tc.is_empty())
.unwrap_or(false);
let has_content = c
.message
.content
.as_ref()
.map(|t| !t.is_empty())
.unwrap_or(false);
let blocks = if has_tool_calls {
let tool_calls = c.message.tool_calls.as_ref().unwrap();
tool_calls
.iter()
.map(|tc| {
let input: serde_json::Value =
serde_json::from_str(&tc.function.arguments)
.unwrap_or(serde_json::Value::Null);
ContentBlock::ToolUse {
id: tc.id.clone(),
name: tc.function.name.clone(),
input,
}
})
.collect()
} else if has_content {
vec![ContentBlock::Text {
text: c.message.content.clone().unwrap(),
}]
} else {
vec![ContentBlock::Text {
text: String::new(),
}]
};
let stop = match c.finish_reason.as_deref() {
Some("stop") => StopReason::EndTurn,
Some("length") => StopReason::MaxTokens,
Some("tool_calls") => StopReason::ToolUse,
_ => StopReason::EndTurn,
};
(blocks, stop)
}
None => (
vec![ContentBlock::Text {
text: String::new(),
}],
StopReason::EndTurn,
),
};
let (input_tokens, output_tokens) = api_response
.usage
.map(|u| (u.prompt_tokens, u.completion_tokens))
.unwrap_or((0, 0));
CompletionResponse {
content,
model,
input_tokens,
output_tokens,
stop_reason,
}
}
/// Build the `reqwest::RequestBuilder` with an optional Authorization header.
///
/// Ollama does not need one; LM Studio / vLLM may be configured with an
/// optional API key. We send the header only when a key is present.
fn authenticated_post(&self, url: &str) -> reqwest::RequestBuilder {
self.client.post(url).header("Accept", "*/*")
}
}
#[async_trait]
@@ -44,30 +254,394 @@ impl LlmDriver for LocalDriver {
}
fn is_configured(&self) -> bool {
// Local drivers don't require API keys
// Local drivers never require an API key.
true
}
async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
// TODO: Implement actual API call (OpenAI-compatible)
Ok(CompletionResponse {
content: vec![ContentBlock::Text {
text: "Local driver not yet implemented".to_string(),
}],
model: request.model,
input_tokens: 0,
output_tokens: 0,
stop_reason: StopReason::EndTurn,
})
let api_request = self.build_api_request(&request);
let url = format!("{}/chat/completions", self.base_url);
tracing::debug!(target: "local_driver", "Sending request to {}", url);
tracing::trace!(
target: "local_driver",
"Request body: {}",
serde_json::to_string(&api_request).unwrap_or_default()
);
let response = self
.authenticated_post(&url)
.json(&api_request)
.send()
.await
.map_err(|e| {
let hint = connection_error_hint(&e);
ZclawError::LlmError(format!("Failed to connect to local LLM server at {}: {}{}", self.base_url, e, hint))
})?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
tracing::warn!(target: "local_driver", "API error {}: {}", status, body);
return Err(ZclawError::LlmError(format!(
"Local LLM API error {}: {}",
status, body
)));
}
let api_response: LocalApiResponse = response
.json()
.await
.map_err(|e| ZclawError::LlmError(format!("Failed to parse response: {}", e)))?;
Ok(self.convert_response(api_response, request.model))
}
fn stream(
&self,
_request: CompletionRequest,
request: CompletionRequest,
) -> Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send + '_>> {
// Placeholder - return error stream
Box::pin(futures::stream::once(async {
Err(ZclawError::LlmError("Local driver streaming not yet implemented".to_string()))
}))
let mut stream_request = self.build_api_request(&request);
stream_request.stream = true;
let url = format!("{}/chat/completions", self.base_url);
tracing::debug!(target: "local_driver", "Starting stream to {}", url);
Box::pin(stream! {
let response = match self
.authenticated_post(&url)
.header("Content-Type", "application/json")
.timeout(std::time::Duration::from_secs(300))
.json(&stream_request)
.send()
.await
{
Ok(r) => {
tracing::debug!(target: "local_driver", "Stream response status: {}", r.status());
r
}
Err(e) => {
let hint = connection_error_hint(&e);
tracing::error!(target: "local_driver", "Stream connection failed: {}{}", e, hint);
yield Err(ZclawError::LlmError(format!(
"Failed to connect to local LLM server at {}: {}{}",
self.base_url, e, hint
)));
return;
}
};
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
return;
}
let mut byte_stream = response.bytes_stream();
let mut accumulated_tool_calls: std::collections::HashMap<String, (String, String)> =
std::collections::HashMap::new();
let mut current_tool_id: Option<String> = None;
while let Some(chunk_result) = byte_stream.next().await {
let chunk = match chunk_result {
Ok(c) => c,
Err(e) => {
yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
continue;
}
};
let text = String::from_utf8_lossy(&chunk);
for line in text.lines() {
if let Some(data) = line.strip_prefix("data: ") {
if data == "[DONE]" {
tracing::debug!(
target: "local_driver",
"Stream done, tool_calls accumulated: {}",
accumulated_tool_calls.len()
);
for (id, (name, args)) in &accumulated_tool_calls {
if name.is_empty() {
tracing::warn!(
target: "local_driver",
"Skipping tool call with empty name: id={}",
id
);
continue;
}
let parsed_args: serde_json::Value = if args.is_empty() {
serde_json::json!({})
} else {
serde_json::from_str(args).unwrap_or_else(|e| {
tracing::warn!(
target: "local_driver",
"Failed to parse tool args '{}': {}",
args, e
);
serde_json::json!({})
})
};
yield Ok(StreamChunk::ToolUseEnd {
id: id.clone(),
input: parsed_args,
});
}
yield Ok(StreamChunk::Complete {
input_tokens: 0,
output_tokens: 0,
stop_reason: "end_turn".to_string(),
});
continue;
}
match serde_json::from_str::<LocalStreamResponse>(data) {
Ok(resp) => {
if let Some(choice) = resp.choices.first() {
let delta = &choice.delta;
// Text content
if let Some(content) = &delta.content {
if !content.is_empty() {
yield Ok(StreamChunk::TextDelta {
delta: content.clone(),
});
}
}
// Tool calls
if let Some(tool_calls) = &delta.tool_calls {
for tc in tool_calls {
// Tool call start
if let Some(id) = &tc.id {
let name = tc
.function
.as_ref()
.and_then(|f| f.name.clone())
.unwrap_or_default();
if !name.is_empty() {
current_tool_id = Some(id.clone());
accumulated_tool_calls
.insert(id.clone(), (name.clone(), String::new()));
yield Ok(StreamChunk::ToolUseStart {
id: id.clone(),
name,
});
} else {
current_tool_id = Some(id.clone());
accumulated_tool_calls
.insert(id.clone(), (String::new(), String::new()));
}
}
// Tool call delta
if let Some(function) = &tc.function {
if let Some(args) = &function.arguments {
let tool_id = tc
.id
.as_ref()
.or(current_tool_id.as_ref())
.cloned()
.unwrap_or_default();
yield Ok(StreamChunk::ToolUseDelta {
id: tool_id.clone(),
delta: args.clone(),
});
if let Some(entry) =
accumulated_tool_calls.get_mut(&tool_id)
{
entry.1.push_str(args);
}
}
}
}
}
}
}
Err(e) => {
tracing::warn!(
target: "local_driver",
"Failed to parse SSE: {}, data: {}",
e, data
);
}
}
}
}
}
})
}
}
// ---------------------------------------------------------------------------
// Connection-error diagnostics
// ---------------------------------------------------------------------------
/// Return a human-readable hint when the local server appears to be unreachable.
fn connection_error_hint(error: &reqwest::Error) -> String {
if error.is_connect() {
format!(
"\n\nHint: Is the local LLM server running at {}?\n\
Make sure the server is started before using this driver.",
// Extract just the host:port from whatever error we have.
"localhost"
)
} else if error.is_timeout() {
"\n\nHint: The request timed out. Local inference can be slow -- \
try a smaller model or increase the timeout."
.to_string()
} else {
String::new()
}
}
// ---------------------------------------------------------------------------
// OpenAI-compatible API types (private to this module)
// ---------------------------------------------------------------------------
#[derive(Serialize)]
struct LocalApiRequest {
model: String,
messages: Vec<LocalApiMessage>,
#[serde(skip_serializing_if = "Option::is_none")]
max_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
stop: Option<Vec<String>>,
#[serde(default)]
stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
tools: Option<Vec<LocalApiTool>>,
}
#[derive(Serialize)]
struct LocalApiMessage {
role: String,
#[serde(skip_serializing_if = "Option::is_none")]
content: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
tool_calls: Option<Vec<LocalApiToolCall>>,
}
#[derive(Serialize)]
struct LocalApiToolCall {
id: String,
r#type: String,
function: LocalFunctionCall,
}
#[derive(Serialize)]
struct LocalFunctionCall {
name: String,
arguments: String,
}
#[derive(Serialize)]
struct LocalApiTool {
r#type: String,
function: LocalFunctionDef,
}
#[derive(Serialize)]
struct LocalFunctionDef {
name: String,
description: String,
parameters: serde_json::Value,
}
// --- Response types ---
#[derive(Deserialize, Default)]
struct LocalApiResponse {
#[serde(default)]
choices: Vec<LocalApiChoice>,
#[serde(default)]
usage: Option<LocalApiUsage>,
}
#[derive(Deserialize, Default)]
struct LocalApiChoice {
#[serde(default)]
message: LocalApiResponseMessage,
#[serde(default)]
finish_reason: Option<String>,
}
#[derive(Deserialize, Default)]
struct LocalApiResponseMessage {
#[serde(default)]
content: Option<String>,
#[serde(default)]
tool_calls: Option<Vec<LocalApiToolCallResponse>>,
}
#[derive(Deserialize, Default)]
struct LocalApiToolCallResponse {
#[serde(default)]
id: String,
#[serde(default)]
function: LocalFunctionCallResponse,
}
#[derive(Deserialize, Default)]
struct LocalFunctionCallResponse {
#[serde(default)]
name: String,
#[serde(default)]
arguments: String,
}
#[derive(Deserialize, Default)]
struct LocalApiUsage {
#[serde(default)]
prompt_tokens: u32,
#[serde(default)]
completion_tokens: u32,
}
// --- Streaming types ---
#[derive(Debug, Deserialize)]
struct LocalStreamResponse {
#[serde(default)]
choices: Vec<LocalStreamChoice>,
}
#[derive(Debug, Deserialize)]
struct LocalStreamChoice {
#[serde(default)]
delta: LocalDelta,
#[serde(default)]
#[allow(dead_code)] // Deserialized from SSE, not accessed in code
finish_reason: Option<String>,
}
#[derive(Debug, Deserialize, Default)]
struct LocalDelta {
#[serde(default)]
content: Option<String>,
#[serde(default)]
tool_calls: Option<Vec<LocalToolCallDelta>>,
}
#[derive(Debug, Deserialize)]
struct LocalToolCallDelta {
#[serde(default)]
id: Option<String>,
#[serde(default)]
function: Option<LocalFunctionDelta>,
}
#[derive(Debug, Deserialize)]
struct LocalFunctionDelta {
#[serde(default)]
name: Option<String>,
#[serde(default)]
arguments: Option<String>,
}

View File

@@ -12,6 +12,7 @@ pub mod loop_runner;
pub mod loop_guard;
pub mod stream;
pub mod growth;
pub mod compaction;
// Re-export main types
pub use driver::{

View File

@@ -11,6 +11,7 @@ use crate::tool::{ToolRegistry, ToolContext, SkillExecutor};
use crate::tool::builtin::PathValidator;
use crate::loop_guard::LoopGuard;
use crate::growth::GrowthIntegration;
use crate::compaction;
use zclaw_memory::MemoryStore;
/// Agent loop runner
@@ -29,6 +30,8 @@ pub struct AgentLoop {
path_validator: Option<PathValidator>,
/// Growth system integration (optional)
growth: Option<GrowthIntegration>,
/// Compaction threshold in tokens (0 = disabled)
compaction_threshold: usize,
}
impl AgentLoop {
@@ -51,6 +54,7 @@ impl AgentLoop {
skill_executor: None,
path_validator: None,
growth: None,
compaction_threshold: 0,
}
}
@@ -101,6 +105,16 @@ impl AgentLoop {
self.growth = Some(growth);
}
/// Set compaction threshold in tokens (0 = disabled)
///
/// When the estimated token count of conversation history exceeds this
/// threshold, older messages are summarized into a single system message
/// and only recent messages are sent to the LLM.
pub fn with_compaction_threshold(mut self, threshold: usize) -> Self {
self.compaction_threshold = threshold;
self
}
/// Get growth integration reference
pub fn growth(&self) -> Option<&GrowthIntegration> {
self.growth.as_ref()
@@ -134,6 +148,11 @@ impl AgentLoop {
// Get all messages for context
let mut messages = self.memory.get_messages(&session_id).await?;
// Apply compaction if threshold is configured
if self.compaction_threshold > 0 {
messages = compaction::maybe_compact(messages, self.compaction_threshold);
}
// Enhance system prompt with growth memories
let enhanced_prompt = if let Some(ref growth) = self.growth {
let base = self.system_prompt.as_deref().unwrap_or("");
@@ -260,7 +279,12 @@ impl AgentLoop {
self.memory.append_message(&session_id, &user_message).await?;
// Get all messages for context
let messages = self.memory.get_messages(&session_id).await?;
let mut messages = self.memory.get_messages(&session_id).await?;
// Apply compaction if threshold is configured
if self.compaction_threshold > 0 {
messages = compaction::maybe_compact(messages, self.compaction_threshold);
}
// Enhance system prompt with growth memories
let enhanced_prompt = if let Some(ref growth) = self.growth {