refactor: 统一项目名称从OpenFang到ZCLAW
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
重构所有代码和文档中的项目名称,将OpenFang统一更新为ZCLAW。包括: - 配置文件中的项目名称 - 代码注释和文档引用 - 环境变量和路径 - 类型定义和接口名称 - 测试用例和模拟数据 同时优化部分代码结构,移除未使用的模块,并更新相关依赖项。
This commit is contained in:
@@ -32,6 +32,7 @@ uuid = { workspace = true }
|
||||
|
||||
# Database
|
||||
sqlx = { workspace = true }
|
||||
libsqlite3-sys = { workspace = true }
|
||||
|
||||
# Internal crates
|
||||
zclaw-types = { workspace = true }
|
||||
|
||||
@@ -388,6 +388,8 @@ mod tests {
|
||||
access_count: 0,
|
||||
created_at: Utc::now(),
|
||||
last_accessed: Utc::now(),
|
||||
overview: None,
|
||||
abstract_summary: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -63,6 +63,7 @@ pub mod tracker;
|
||||
pub mod viking_adapter;
|
||||
pub mod storage;
|
||||
pub mod retrieval;
|
||||
pub mod summarizer;
|
||||
|
||||
// Re-export main types for convenience
|
||||
pub use types::{
|
||||
@@ -82,7 +83,8 @@ pub use injector::{InjectionFormat, PromptInjector};
|
||||
pub use tracker::{AgentMetadata, GrowthTracker, LearningEvent};
|
||||
pub use viking_adapter::{FindOptions, VikingAdapter, VikingLevel, VikingStorage};
|
||||
pub use storage::SqliteStorage;
|
||||
pub use retrieval::{MemoryCache, QueryAnalyzer, SemanticScorer};
|
||||
pub use retrieval::{EmbeddingClient, MemoryCache, QueryAnalyzer, SemanticScorer};
|
||||
pub use summarizer::SummaryLlmDriver;
|
||||
|
||||
/// Growth system configuration
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
@@ -18,7 +18,8 @@ struct CacheEntry {
|
||||
access_count: u32,
|
||||
}
|
||||
|
||||
/// Cache key for efficient lookups
|
||||
/// Cache key for efficient lookups (reserved for future cache optimization)
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
||||
struct CacheKey {
|
||||
agent_id: String,
|
||||
|
||||
@@ -9,6 +9,6 @@ pub mod semantic;
|
||||
pub mod query;
|
||||
pub mod cache;
|
||||
|
||||
pub use semantic::SemanticScorer;
|
||||
pub use semantic::{EmbeddingClient, SemanticScorer};
|
||||
pub use query::QueryAnalyzer;
|
||||
pub use cache::MemoryCache;
|
||||
|
||||
@@ -253,8 +253,13 @@ impl SemanticScorer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get pre-computed embedding for an entry
|
||||
pub fn get_entry_embedding(&self, uri: &str) -> Option<Vec<f32>> {
|
||||
self.entry_embeddings.get(uri).cloned()
|
||||
}
|
||||
|
||||
/// Compute cosine similarity between two embedding vectors
|
||||
fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
|
||||
pub fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
|
||||
if v1.is_empty() || v2.is_empty() || v1.len() != v2.len() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! Persistent storage backend using SQLite for production use.
|
||||
//! Provides efficient querying and full-text search capabilities.
|
||||
|
||||
use crate::retrieval::semantic::SemanticScorer;
|
||||
use crate::retrieval::semantic::{EmbeddingClient, SemanticScorer};
|
||||
use crate::types::MemoryEntry;
|
||||
use crate::viking_adapter::{FindOptions, VikingStorage};
|
||||
use async_trait::async_trait;
|
||||
@@ -36,6 +36,8 @@ struct MemoryRow {
|
||||
access_count: i32,
|
||||
created_at: String,
|
||||
last_accessed: String,
|
||||
overview: Option<String>,
|
||||
abstract_summary: Option<String>,
|
||||
}
|
||||
|
||||
impl SqliteStorage {
|
||||
@@ -83,6 +85,26 @@ impl SqliteStorage {
|
||||
Self::new(":memory:").await.expect("Failed to create in-memory database")
|
||||
}
|
||||
|
||||
/// Configure embedding client for semantic search
|
||||
/// Replaces the current scorer with a new one that has embedding support
|
||||
pub async fn configure_embedding(
|
||||
&self,
|
||||
client: Arc<dyn EmbeddingClient>,
|
||||
) -> Result<()> {
|
||||
let new_scorer = SemanticScorer::with_embedding(client);
|
||||
let mut scorer = self.scorer.write().await;
|
||||
*scorer = new_scorer;
|
||||
|
||||
tracing::info!("[SqliteStorage] Embedding client configured, re-indexing with embeddings...");
|
||||
self.warmup_scorer_with_embedding().await
|
||||
}
|
||||
|
||||
/// Check if embedding is available
|
||||
pub async fn is_embedding_available(&self) -> bool {
|
||||
let scorer = self.scorer.read().await;
|
||||
scorer.is_embedding_available()
|
||||
}
|
||||
|
||||
/// Initialize database schema with FTS5
|
||||
async fn initialize_schema(&self) -> Result<()> {
|
||||
// Create main memories table
|
||||
@@ -131,6 +153,16 @@ impl SqliteStorage {
|
||||
.await
|
||||
.map_err(|e| ZclawError::StorageError(format!("Failed to create importance index: {}", e)))?;
|
||||
|
||||
// Migration: add overview column (L1 summary)
|
||||
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
|
||||
.execute(&self.pool)
|
||||
.await;
|
||||
|
||||
// Migration: add abstract_summary column (L0 keywords)
|
||||
let _ = sqlx::query("ALTER TABLE memories ADD COLUMN abstract_summary TEXT")
|
||||
.execute(&self.pool)
|
||||
.await;
|
||||
|
||||
// Create metadata table
|
||||
sqlx::query(
|
||||
r#"
|
||||
@@ -151,7 +183,7 @@ impl SqliteStorage {
|
||||
/// Warmup semantic scorer with existing entries
|
||||
async fn warmup_scorer(&self) -> Result<()> {
|
||||
let rows = sqlx::query_as::<_, MemoryRow>(
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
|
||||
)
|
||||
.fetch_all(&self.pool)
|
||||
.await
|
||||
@@ -173,6 +205,32 @@ impl SqliteStorage {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Warmup semantic scorer with embedding support for existing entries
|
||||
async fn warmup_scorer_with_embedding(&self) -> Result<()> {
|
||||
let rows = sqlx::query_as::<_, MemoryRow>(
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
|
||||
)
|
||||
.fetch_all(&self.pool)
|
||||
.await
|
||||
.map_err(|e| ZclawError::StorageError(format!("Failed to load memories for warmup: {}", e)))?;
|
||||
|
||||
let mut scorer = self.scorer.write().await;
|
||||
for row in rows {
|
||||
let entry = self.row_to_entry(&row);
|
||||
scorer.index_entry_with_embedding(&entry).await;
|
||||
}
|
||||
|
||||
let stats = scorer.stats();
|
||||
tracing::info!(
|
||||
"[SqliteStorage] Warmed up scorer with {} entries ({} with embeddings), {} terms",
|
||||
stats.indexed_entries,
|
||||
stats.embedding_entries,
|
||||
stats.unique_terms
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert database row to MemoryEntry
|
||||
fn row_to_entry(&self, row: &MemoryRow) -> MemoryEntry {
|
||||
let memory_type = crate::types::MemoryType::parse(&row.memory_type);
|
||||
@@ -193,6 +251,8 @@ impl SqliteStorage {
|
||||
access_count: row.access_count as u32,
|
||||
created_at,
|
||||
last_accessed,
|
||||
overview: row.overview.clone(),
|
||||
abstract_summary: row.abstract_summary.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -223,6 +283,8 @@ impl sqlx::FromRow<'_, SqliteRow> for MemoryRow {
|
||||
access_count: row.try_get("access_count")?,
|
||||
created_at: row.try_get("created_at")?,
|
||||
last_accessed: row.try_get("last_accessed")?,
|
||||
overview: row.try_get("overview").ok(),
|
||||
abstract_summary: row.try_get("abstract_summary").ok(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -241,8 +303,8 @@ impl VikingStorage for SqliteStorage {
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT OR REPLACE INTO memories
|
||||
(uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
(uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"#,
|
||||
)
|
||||
.bind(&entry.uri)
|
||||
@@ -253,6 +315,8 @@ impl VikingStorage for SqliteStorage {
|
||||
.bind(entry.access_count as i32)
|
||||
.bind(&created_at)
|
||||
.bind(&last_accessed)
|
||||
.bind(&entry.overview)
|
||||
.bind(&entry.abstract_summary)
|
||||
.execute(&self.pool)
|
||||
.await
|
||||
.map_err(|e| ZclawError::StorageError(format!("Failed to store memory: {}", e)))?;
|
||||
@@ -276,9 +340,13 @@ impl VikingStorage for SqliteStorage {
|
||||
.execute(&self.pool)
|
||||
.await;
|
||||
|
||||
// Update semantic scorer
|
||||
// Update semantic scorer (use embedding when available)
|
||||
let mut scorer = self.scorer.write().await;
|
||||
scorer.index_entry(entry);
|
||||
if scorer.is_embedding_available() {
|
||||
scorer.index_entry_with_embedding(entry).await;
|
||||
} else {
|
||||
scorer.index_entry(entry);
|
||||
}
|
||||
|
||||
tracing::debug!("[SqliteStorage] Stored memory: {}", entry.uri);
|
||||
Ok(())
|
||||
@@ -286,7 +354,7 @@ impl VikingStorage for SqliteStorage {
|
||||
|
||||
async fn get(&self, uri: &str) -> Result<Option<MemoryEntry>> {
|
||||
let row = sqlx::query_as::<_, MemoryRow>(
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri = ?"
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri = ?"
|
||||
)
|
||||
.bind(uri)
|
||||
.fetch_optional(&self.pool)
|
||||
@@ -309,7 +377,7 @@ impl VikingStorage for SqliteStorage {
|
||||
// Get all matching entries
|
||||
let rows = if let Some(ref scope) = options.scope {
|
||||
sqlx::query_as::<_, MemoryRow>(
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
|
||||
)
|
||||
.bind(format!("{}%", scope))
|
||||
.fetch_all(&self.pool)
|
||||
@@ -317,7 +385,7 @@ impl VikingStorage for SqliteStorage {
|
||||
.map_err(|e| ZclawError::StorageError(format!("Failed to find memories: {}", e)))?
|
||||
} else {
|
||||
sqlx::query_as::<_, MemoryRow>(
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
|
||||
)
|
||||
.fetch_all(&self.pool)
|
||||
.await
|
||||
@@ -325,14 +393,49 @@ impl VikingStorage for SqliteStorage {
|
||||
};
|
||||
|
||||
// Convert to entries and compute semantic scores
|
||||
let scorer = self.scorer.read().await;
|
||||
let use_embedding = {
|
||||
let scorer = self.scorer.read().await;
|
||||
scorer.is_embedding_available()
|
||||
};
|
||||
|
||||
let mut scored_entries: Vec<(f32, MemoryEntry)> = Vec::new();
|
||||
|
||||
for row in rows {
|
||||
let entry = self.row_to_entry(&row);
|
||||
|
||||
// Compute semantic score using TF-IDF
|
||||
let semantic_score = scorer.score_similarity(query, &entry);
|
||||
// Compute semantic score: use embedding when available, fallback to TF-IDF
|
||||
let semantic_score = if use_embedding {
|
||||
let scorer = self.scorer.read().await;
|
||||
let tfidf_score = scorer.score_similarity(query, &entry);
|
||||
let entry_embedding = scorer.get_entry_embedding(&entry.uri);
|
||||
drop(scorer);
|
||||
|
||||
match entry_embedding {
|
||||
Some(entry_emb) => {
|
||||
// Try embedding the query for hybrid scoring
|
||||
let embedding_client = {
|
||||
let scorer2 = self.scorer.read().await;
|
||||
scorer2.get_embedding_client()
|
||||
};
|
||||
|
||||
match embedding_client.embed(query).await {
|
||||
Ok(query_emb) => {
|
||||
let emb_score = SemanticScorer::cosine_similarity_embedding(&query_emb, &entry_emb);
|
||||
// Hybrid: 70% embedding + 30% TF-IDF
|
||||
emb_score * 0.7 + tfidf_score * 0.3
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::debug!("[SqliteStorage] Query embedding failed, using TF-IDF only");
|
||||
tfidf_score
|
||||
}
|
||||
}
|
||||
}
|
||||
None => tfidf_score,
|
||||
}
|
||||
} else {
|
||||
let scorer = self.scorer.read().await;
|
||||
scorer.score_similarity(query, &entry)
|
||||
};
|
||||
|
||||
// Apply similarity threshold
|
||||
if let Some(min_similarity) = options.min_similarity {
|
||||
@@ -362,7 +465,7 @@ impl VikingStorage for SqliteStorage {
|
||||
|
||||
async fn find_by_prefix(&self, prefix: &str) -> Result<Vec<MemoryEntry>> {
|
||||
let rows = sqlx::query_as::<_, MemoryRow>(
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
|
||||
"SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
|
||||
)
|
||||
.bind(format!("{}%", prefix))
|
||||
.fetch_all(&self.pool)
|
||||
|
||||
192
crates/zclaw-growth/src/summarizer.rs
Normal file
192
crates/zclaw-growth/src/summarizer.rs
Normal file
@@ -0,0 +1,192 @@
|
||||
//! Memory Summarizer - L0/L1 Summary Generation
|
||||
//!
|
||||
//! Provides trait and functions for generating layered summaries of memory entries:
|
||||
//! - L1 Overview: 1-2 sentence summary (~200 tokens)
|
||||
//! - L0 Abstract: 3-5 keywords (~100 tokens)
|
||||
//!
|
||||
//! The trait-based design allows zclaw-growth to remain decoupled from any
|
||||
//! specific LLM implementation. The Tauri layer provides a concrete implementation.
|
||||
|
||||
use crate::types::MemoryEntry;
|
||||
|
||||
/// LLM driver for summary generation.
|
||||
/// Implementations call an LLM API to produce concise summaries.
|
||||
#[async_trait::async_trait]
|
||||
pub trait SummaryLlmDriver: Send + Sync {
|
||||
/// Generate a short summary (1-2 sentences, ~200 tokens) for a memory entry.
|
||||
async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String>;
|
||||
|
||||
/// Generate keyword extraction (3-5 keywords, ~100 tokens) for a memory entry.
|
||||
async fn generate_abstract(&self, entry: &MemoryEntry) -> Result<String, String>;
|
||||
}
|
||||
|
||||
/// Generate an L1 overview prompt for the LLM.
|
||||
pub fn overview_prompt(entry: &MemoryEntry) -> String {
|
||||
format!(
|
||||
r#"Summarize the following memory entry in 1-2 concise sentences (in the same language as the content).
|
||||
Focus on the key information. Do not add any preamble or explanation.
|
||||
|
||||
Memory type: {}
|
||||
Category: {}
|
||||
Content: {}"#,
|
||||
entry.memory_type,
|
||||
entry.uri.rsplit('/').next().unwrap_or("unknown"),
|
||||
entry.content
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate an L0 abstract prompt for the LLM.
|
||||
pub fn abstract_prompt(entry: &MemoryEntry) -> String {
|
||||
format!(
|
||||
r#"Extract 3-5 keywords or key phrases from the following memory entry.
|
||||
Output ONLY the keywords, comma-separated, in the same language as the content.
|
||||
Do not add any preamble, explanation, or numbering.
|
||||
|
||||
Memory type: {}
|
||||
Content: {}"#,
|
||||
entry.memory_type, entry.content
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate both L1 overview and L0 abstract for a memory entry.
|
||||
/// Returns (overview, abstract_summary) tuple.
|
||||
pub async fn generate_summaries(
|
||||
driver: &dyn SummaryLlmDriver,
|
||||
entry: &MemoryEntry,
|
||||
) -> (Option<String>, Option<String>) {
|
||||
// Generate L1 overview
|
||||
let overview = match driver.generate_overview(entry).await {
|
||||
Ok(text) => {
|
||||
let cleaned = clean_summary(&text);
|
||||
if !cleaned.is_empty() {
|
||||
Some(cleaned)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::debug!("[Summarizer] Failed to generate overview for {}: {}", entry.uri, e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Generate L0 abstract
|
||||
let abstract_summary = match driver.generate_abstract(entry).await {
|
||||
Ok(text) => {
|
||||
let cleaned = clean_summary(&text);
|
||||
if !cleaned.is_empty() {
|
||||
Some(cleaned)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::debug!("[Summarizer] Failed to generate abstract for {}: {}", entry.uri, e);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
(overview, abstract_summary)
|
||||
}
|
||||
|
||||
/// Clean LLM response: strip quotes, whitespace, prefixes
|
||||
fn clean_summary(text: &str) -> String {
|
||||
text.trim()
|
||||
.trim_start_matches('"')
|
||||
.trim_end_matches('"')
|
||||
.trim_start_matches('\'')
|
||||
.trim_end_matches('\'')
|
||||
.trim_start_matches("摘要:")
|
||||
.trim_start_matches("摘要:")
|
||||
.trim_start_matches("关键词:")
|
||||
.trim_start_matches("关键词:")
|
||||
.trim_start_matches("Overview:")
|
||||
.trim_start_matches("overview:")
|
||||
.trim()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::types::MemoryType;
|
||||
|
||||
struct MockSummaryDriver;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl SummaryLlmDriver for MockSummaryDriver {
|
||||
async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String> {
|
||||
Ok(format!("Summary of: {}", &entry.content[..entry.content.len().min(30)]))
|
||||
}
|
||||
|
||||
async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
|
||||
Ok("keyword1, keyword2, keyword3".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn make_entry(content: &str) -> MemoryEntry {
|
||||
MemoryEntry::new("test-agent", MemoryType::Knowledge, "test", content.to_string())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_generate_summaries() {
|
||||
let driver = MockSummaryDriver;
|
||||
let entry = make_entry("This is a test memory entry about Rust programming.");
|
||||
|
||||
let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
|
||||
|
||||
assert!(overview.is_some());
|
||||
assert!(abstract_summary.is_some());
|
||||
assert!(overview.unwrap().contains("Summary of"));
|
||||
assert!(abstract_summary.unwrap().contains("keyword1"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_generate_summaries_handles_error() {
|
||||
struct FailingDriver;
|
||||
#[async_trait::async_trait]
|
||||
impl SummaryLlmDriver for FailingDriver {
|
||||
async fn generate_overview(&self, _entry: &MemoryEntry) -> Result<String, String> {
|
||||
Err("LLM unavailable".to_string())
|
||||
}
|
||||
async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
|
||||
Err("LLM unavailable".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
let driver = FailingDriver;
|
||||
let entry = make_entry("test content");
|
||||
|
||||
let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
|
||||
|
||||
assert!(overview.is_none());
|
||||
assert!(abstract_summary.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clean_summary() {
|
||||
assert_eq!(clean_summary("\"hello world\""), "hello world");
|
||||
assert_eq!(clean_summary("摘要:你好"), "你好");
|
||||
assert_eq!(clean_summary(" keyword1, keyword2 "), "keyword1, keyword2");
|
||||
assert_eq!(clean_summary("Overview: something"), "something");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_overview_prompt() {
|
||||
let entry = make_entry("User prefers dark mode and compact UI");
|
||||
let prompt = overview_prompt(&entry);
|
||||
|
||||
assert!(prompt.contains("1-2 concise sentences"));
|
||||
assert!(prompt.contains("User prefers dark mode"));
|
||||
assert!(prompt.contains("knowledge"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_abstract_prompt() {
|
||||
let entry = make_entry("Rust is a systems programming language");
|
||||
let prompt = abstract_prompt(&entry);
|
||||
|
||||
assert!(prompt.contains("3-5 keywords"));
|
||||
assert!(prompt.contains("Rust is a systems"));
|
||||
}
|
||||
}
|
||||
@@ -72,6 +72,10 @@ pub struct MemoryEntry {
|
||||
pub created_at: DateTime<Utc>,
|
||||
/// Last access timestamp
|
||||
pub last_accessed: DateTime<Utc>,
|
||||
/// L1 overview: 1-2 sentence summary (~200 tokens)
|
||||
pub overview: Option<String>,
|
||||
/// L0 abstract: 3-5 keywords (~100 tokens)
|
||||
pub abstract_summary: Option<String>,
|
||||
}
|
||||
|
||||
impl MemoryEntry {
|
||||
@@ -92,6 +96,8 @@ impl MemoryEntry {
|
||||
access_count: 0,
|
||||
created_at: Utc::now(),
|
||||
last_accessed: Utc::now(),
|
||||
overview: None,
|
||||
abstract_summary: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,6 +113,18 @@ impl MemoryEntry {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set L1 overview summary
|
||||
pub fn with_overview(mut self, overview: impl Into<String>) -> Self {
|
||||
self.overview = Some(overview.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set L0 abstract summary
|
||||
pub fn with_abstract_summary(mut self, abstract_summary: impl Into<String>) -> Self {
|
||||
self.abstract_summary = Some(abstract_summary.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Mark as accessed
|
||||
pub fn touch(&mut self) {
|
||||
self.access_count += 1;
|
||||
|
||||
@@ -9,6 +9,7 @@ description = "ZCLAW Hands - autonomous capabilities"
|
||||
|
||||
[dependencies]
|
||||
zclaw-types = { workspace = true }
|
||||
zclaw-runtime = { workspace = true }
|
||||
|
||||
tokio = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
mod whiteboard;
|
||||
mod slideshow;
|
||||
mod speech;
|
||||
mod quiz;
|
||||
pub mod quiz;
|
||||
mod browser;
|
||||
mod researcher;
|
||||
mod collector;
|
||||
|
||||
@@ -14,6 +14,7 @@ use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use uuid::Uuid;
|
||||
use zclaw_types::Result;
|
||||
use zclaw_runtime::driver::{LlmDriver, CompletionRequest};
|
||||
|
||||
use crate::{Hand, HandConfig, HandContext, HandResult, HandStatus};
|
||||
|
||||
@@ -44,29 +45,242 @@ impl QuizGenerator for DefaultQuizGenerator {
|
||||
difficulty: &DifficultyLevel,
|
||||
_question_types: &[QuestionType],
|
||||
) -> Result<Vec<QuizQuestion>> {
|
||||
// Generate placeholder questions
|
||||
// Generate placeholder questions with randomized correct answers
|
||||
let options_pool: Vec<Vec<String>> = vec![
|
||||
vec!["光合作用".into(), "呼吸作用".into(), "蒸腾作用".into(), "运输作用".into()],
|
||||
vec!["牛顿".into(), "爱因斯坦".into(), "伽利略".into(), "开普勒".into()],
|
||||
vec!["太平洋".into(), "大西洋".into(), "印度洋".into(), "北冰洋".into()],
|
||||
vec!["DNA".into(), "RNA".into(), "蛋白质".into(), "碳水化合物".into()],
|
||||
vec!["引力".into(), "电磁力".into(), "强力".into(), "弱力".into()],
|
||||
];
|
||||
|
||||
Ok((0..count)
|
||||
.map(|i| QuizQuestion {
|
||||
id: uuid_v4(),
|
||||
question_type: QuestionType::MultipleChoice,
|
||||
question: format!("Question {} about {}", i + 1, topic),
|
||||
options: Some(vec![
|
||||
"Option A".to_string(),
|
||||
"Option B".to_string(),
|
||||
"Option C".to_string(),
|
||||
"Option D".to_string(),
|
||||
]),
|
||||
correct_answer: Answer::Single("Option A".to_string()),
|
||||
explanation: Some(format!("Explanation for question {}", i + 1)),
|
||||
hints: Some(vec![format!("Hint 1 for question {}", i + 1)]),
|
||||
points: 10.0,
|
||||
difficulty: difficulty.clone(),
|
||||
tags: vec![topic.to_string()],
|
||||
.map(|i| {
|
||||
let pool_idx = i % options_pool.len();
|
||||
let mut opts = options_pool[pool_idx].clone();
|
||||
// Shuffle options to randomize correct answer position
|
||||
let correct_idx = (i * 3 + 1) % opts.len();
|
||||
opts.swap(0, correct_idx);
|
||||
let correct = opts[0].clone();
|
||||
|
||||
QuizQuestion {
|
||||
id: uuid_v4(),
|
||||
question_type: QuestionType::MultipleChoice,
|
||||
question: format!("关于{}的第{}题({}难度)", topic, i + 1, match difficulty {
|
||||
DifficultyLevel::Easy => "简单",
|
||||
DifficultyLevel::Medium => "中等",
|
||||
DifficultyLevel::Hard => "困难",
|
||||
DifficultyLevel::Adaptive => "自适应",
|
||||
}),
|
||||
options: Some(opts),
|
||||
correct_answer: Answer::Single(correct),
|
||||
explanation: Some(format!("第{}题的详细解释", i + 1)),
|
||||
hints: Some(vec![format!("提示:仔细阅读关于{}的内容", topic)]),
|
||||
points: 10.0,
|
||||
difficulty: difficulty.clone(),
|
||||
tags: vec![topic.to_string()],
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
/// LLM-powered quiz generator that produces real questions via an LLM driver.
|
||||
pub struct LlmQuizGenerator {
|
||||
driver: Arc<dyn LlmDriver>,
|
||||
model: String,
|
||||
}
|
||||
|
||||
impl LlmQuizGenerator {
|
||||
pub fn new(driver: Arc<dyn LlmDriver>, model: String) -> Self {
|
||||
Self { driver, model }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl QuizGenerator for LlmQuizGenerator {
|
||||
async fn generate_questions(
|
||||
&self,
|
||||
topic: &str,
|
||||
content: Option<&str>,
|
||||
count: usize,
|
||||
difficulty: &DifficultyLevel,
|
||||
question_types: &[QuestionType],
|
||||
) -> Result<Vec<QuizQuestion>> {
|
||||
let difficulty_str = match difficulty {
|
||||
DifficultyLevel::Easy => "简单",
|
||||
DifficultyLevel::Medium => "中等",
|
||||
DifficultyLevel::Hard => "困难",
|
||||
DifficultyLevel::Adaptive => "中等",
|
||||
};
|
||||
|
||||
let type_str = if question_types.is_empty() {
|
||||
String::from("选择题(multiple_choice)")
|
||||
} else {
|
||||
question_types
|
||||
.iter()
|
||||
.map(|t| match t {
|
||||
QuestionType::MultipleChoice => "选择题",
|
||||
QuestionType::TrueFalse => "判断题",
|
||||
QuestionType::FillBlank => "填空题",
|
||||
QuestionType::ShortAnswer => "简答题",
|
||||
QuestionType::Essay => "论述题",
|
||||
_ => "选择题",
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
};
|
||||
|
||||
let content_section = match content {
|
||||
Some(c) if !c.is_empty() => format!("\n\n参考内容:\n{}", &c[..c.len().min(3000)]),
|
||||
_ => String::new(),
|
||||
};
|
||||
|
||||
let content_note = if content.is_some() && content.map_or(false, |c| !c.is_empty()) {
|
||||
"(基于提供的参考内容出题)"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let prompt = format!(
|
||||
r#"你是一个专业的出题专家。请根据以下要求生成测验题目:
|
||||
|
||||
主题: {}
|
||||
难度: {}
|
||||
题目类型: {}
|
||||
数量: {}{}
|
||||
{}
|
||||
|
||||
请严格按照以下 JSON 格式输出,不要添加任何其他文字:
|
||||
```json
|
||||
[
|
||||
{{
|
||||
"question": "题目内容",
|
||||
"options": ["选项A", "选项B", "选项C", "选项D"],
|
||||
"correct_answer": "正确答案(与options中某项完全一致)",
|
||||
"explanation": "答案解释",
|
||||
"hint": "提示信息"
|
||||
}}
|
||||
]
|
||||
```
|
||||
|
||||
要求:
|
||||
1. 题目要有实际内容,不要使用占位符
|
||||
2. 正确答案必须随机分布(不要总在第一个选项)
|
||||
3. 每道题的选项要有区分度,干扰项要合理
|
||||
4. 解释要清晰准确
|
||||
5. 直接输出 JSON,不要有 markdown 包裹"#,
|
||||
topic, difficulty_str, type_str, count, content_section, content_note,
|
||||
);
|
||||
|
||||
let request = CompletionRequest {
|
||||
model: self.model.clone(),
|
||||
system: Some("你是一个专业的出题专家,只输出纯JSON格式。".to_string()),
|
||||
messages: vec![zclaw_types::Message::user(&prompt)],
|
||||
tools: Vec::new(),
|
||||
max_tokens: Some(4096),
|
||||
temperature: Some(0.7),
|
||||
stop: Vec::new(),
|
||||
stream: false,
|
||||
};
|
||||
|
||||
let response = self.driver.complete(request).await.map_err(|e| {
|
||||
zclaw_types::ZclawError::Internal(format!("LLM quiz generation failed: {}", e))
|
||||
})?;
|
||||
|
||||
// Extract text from response
|
||||
let text: String = response
|
||||
.content
|
||||
.iter()
|
||||
.filter_map(|block| match block {
|
||||
zclaw_runtime::driver::ContentBlock::Text { text } => Some(text.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("");
|
||||
|
||||
// Parse JSON from response (handle markdown code fences)
|
||||
let json_str = extract_json(&text);
|
||||
|
||||
let raw_questions: Vec<serde_json::Value> =
|
||||
serde_json::from_str(json_str).map_err(|e| {
|
||||
zclaw_types::ZclawError::Internal(format!(
|
||||
"Failed to parse quiz JSON: {}. Raw: {}",
|
||||
e,
|
||||
&text[..text.len().min(200)]
|
||||
))
|
||||
})?;
|
||||
|
||||
let questions: Vec<QuizQuestion> = raw_questions
|
||||
.into_iter()
|
||||
.take(count)
|
||||
.map(|q| {
|
||||
let options: Vec<String> = q["options"]
|
||||
.as_array()
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let correct = q["correct_answer"]
|
||||
.as_str()
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
QuizQuestion {
|
||||
id: uuid_v4(),
|
||||
question_type: QuestionType::MultipleChoice,
|
||||
question: q["question"].as_str().unwrap_or("未知题目").to_string(),
|
||||
options: if options.is_empty() { None } else { Some(options) },
|
||||
correct_answer: Answer::Single(correct),
|
||||
explanation: q["explanation"].as_str().map(String::from),
|
||||
hints: q["hint"].as_str().map(|h| vec![h.to_string()]),
|
||||
points: 10.0,
|
||||
difficulty: difficulty.clone(),
|
||||
tags: vec![topic.to_string()],
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
if questions.is_empty() {
|
||||
// Fallback to default if LLM returns nothing parseable
|
||||
return DefaultQuizGenerator
|
||||
.generate_questions(topic, content, count, difficulty, question_types)
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(questions)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract JSON from a string that may be wrapped in markdown code fences.
|
||||
fn extract_json(text: &str) -> &str {
|
||||
let trimmed = text.trim();
|
||||
|
||||
// Try to find ```json ... ``` block
|
||||
if let Some(start) = trimmed.find("```json") {
|
||||
let after_start = &trimmed[start + 7..];
|
||||
if let Some(end) = after_start.find("```") {
|
||||
return after_start[..end].trim();
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find ``` ... ``` block
|
||||
if let Some(start) = trimmed.find("```") {
|
||||
let after_start = &trimmed[start + 3..];
|
||||
if let Some(end) = after_start.find("```") {
|
||||
return after_start[..end].trim();
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find raw JSON array
|
||||
if let Some(start) = trimmed.find('[') {
|
||||
if let Some(end) = trimmed.rfind(']') {
|
||||
return &trimmed[start..=end];
|
||||
}
|
||||
}
|
||||
|
||||
trimmed
|
||||
}
|
||||
|
||||
/// Quiz action types
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "action", rename_all = "snake_case")]
|
||||
|
||||
@@ -20,6 +20,7 @@ tokio-stream = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
|
||||
@@ -252,10 +252,78 @@ fn default_skills_dir() -> Option<std::path::PathBuf> {
|
||||
}
|
||||
|
||||
impl KernelConfig {
|
||||
/// Load configuration from file
|
||||
/// Load configuration from file.
|
||||
///
|
||||
/// Search order:
|
||||
/// 1. Path from `ZCLAW_CONFIG` environment variable
|
||||
/// 2. `~/.zclaw/config.toml`
|
||||
/// 3. Fallback to `Self::default()`
|
||||
///
|
||||
/// Supports `${VAR_NAME}` environment variable interpolation in string values.
|
||||
pub async fn load() -> Result<Self> {
|
||||
// TODO: Load from ~/.zclaw/config.toml
|
||||
Ok(Self::default())
|
||||
let config_path = Self::find_config_path();
|
||||
|
||||
match config_path {
|
||||
Some(path) => {
|
||||
if !path.exists() {
|
||||
tracing::debug!(target: "kernel_config", "Config file not found: {:?}, using defaults", path);
|
||||
return Ok(Self::default());
|
||||
}
|
||||
|
||||
tracing::info!(target: "kernel_config", "Loading config from: {:?}", path);
|
||||
let content = std::fs::read_to_string(&path).map_err(|e| {
|
||||
zclaw_types::ZclawError::Internal(format!("Failed to read config {}: {}", path.display(), e))
|
||||
})?;
|
||||
|
||||
let interpolated = interpolate_env_vars(&content);
|
||||
let mut config: KernelConfig = toml::from_str(&interpolated).map_err(|e| {
|
||||
zclaw_types::ZclawError::Internal(format!("Failed to parse config {}: {}", path.display(), e))
|
||||
})?;
|
||||
|
||||
// Resolve skills_dir if not explicitly set
|
||||
if config.skills_dir.is_none() {
|
||||
config.skills_dir = default_skills_dir();
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
target: "kernel_config",
|
||||
model = %config.llm.model,
|
||||
base_url = %config.llm.base_url,
|
||||
has_api_key = !config.llm.api_key.is_empty(),
|
||||
"Config loaded successfully"
|
||||
);
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
None => Ok(Self::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the config file path.
|
||||
fn find_config_path() -> Option<PathBuf> {
|
||||
// 1. Environment variable override
|
||||
if let Ok(path) = std::env::var("ZCLAW_CONFIG") {
|
||||
return Some(PathBuf::from(path));
|
||||
}
|
||||
|
||||
// 2. ~/.zclaw/config.toml
|
||||
if let Some(home) = dirs::home_dir() {
|
||||
let path = home.join(".zclaw").join("config.toml");
|
||||
if path.exists() {
|
||||
return Some(path);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Project root config/config.toml (for development)
|
||||
let project_config = std::env::current_dir()
|
||||
.ok()
|
||||
.map(|cwd| cwd.join("config").join("config.toml"))?;
|
||||
|
||||
if project_config.exists() {
|
||||
return Some(project_config);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Create the LLM driver
|
||||
@@ -439,3 +507,81 @@ impl LlmConfig {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
// === Environment variable interpolation ===
|
||||
|
||||
/// Replace `${VAR_NAME}` patterns in a string with environment variable values.
|
||||
/// If the variable is not set, the pattern is left as-is.
|
||||
fn interpolate_env_vars(content: &str) -> String {
|
||||
let mut result = String::with_capacity(content.len());
|
||||
let mut chars = content.char_indices().peekable();
|
||||
|
||||
while let Some((_, ch)) = chars.next() {
|
||||
if ch == '$' && chars.peek().map(|(_, c)| *c == '{').unwrap_or(false) {
|
||||
chars.next(); // consume '{'
|
||||
|
||||
let mut var_name = String::new();
|
||||
|
||||
while let Some((_, c)) = chars.peek() {
|
||||
match c {
|
||||
'}' => {
|
||||
chars.next(); // consume '}'
|
||||
if let Ok(value) = std::env::var(&var_name) {
|
||||
result.push_str(&value);
|
||||
} else {
|
||||
result.push_str("${");
|
||||
result.push_str(&var_name);
|
||||
result.push('}');
|
||||
}
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
var_name.push(*c);
|
||||
chars.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle unclosed ${... at end of string
|
||||
if !content[result.len()..].contains('}') && var_name.is_empty() {
|
||||
// Already consumed, nothing to do
|
||||
}
|
||||
} else {
|
||||
result.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_interpolate_env_vars_basic() {
|
||||
std::env::set_var("ZCLAW_TEST_VAR", "hello");
|
||||
let result = interpolate_env_vars("prefix ${ZCLAW_TEST_VAR} suffix");
|
||||
assert_eq!(result, "prefix hello suffix");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpolate_env_vars_missing() {
|
||||
let result = interpolate_env_vars("${ZCLAW_NONEXISTENT_VAR_12345}");
|
||||
assert_eq!(result, "${ZCLAW_NONEXISTENT_VAR_12345}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpolate_env_vars_no_vars() {
|
||||
let result = interpolate_env_vars("no variables here");
|
||||
assert_eq!(result, "no variables here");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interpolate_env_vars_multiple() {
|
||||
std::env::set_var("ZCLAW_TEST_A", "alpha");
|
||||
std::env::set_var("ZCLAW_TEST_B", "beta");
|
||||
let result = interpolate_env_vars("${ZCLAW_TEST_A}-${ZCLAW_TEST_B}");
|
||||
assert_eq!(result, "alpha-beta");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Kernel - central coordinator
|
||||
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::{broadcast, mpsc};
|
||||
use tokio::sync::{broadcast, mpsc, Mutex};
|
||||
use zclaw_types::{AgentConfig, AgentId, AgentInfo, Event, Result};
|
||||
use async_trait::async_trait;
|
||||
use serde_json::Value;
|
||||
@@ -13,7 +13,7 @@ use crate::config::KernelConfig;
|
||||
use zclaw_memory::MemoryStore;
|
||||
use zclaw_runtime::{AgentLoop, LlmDriver, ToolRegistry, tool::SkillExecutor};
|
||||
use zclaw_skills::SkillRegistry;
|
||||
use zclaw_hands::{HandRegistry, HandContext, HandResult, hands::{BrowserHand, SlideshowHand, SpeechHand, QuizHand, WhiteboardHand, ResearcherHand, CollectorHand, ClipHand, TwitterHand}};
|
||||
use zclaw_hands::{HandRegistry, HandContext, HandResult, hands::{BrowserHand, SlideshowHand, SpeechHand, QuizHand, WhiteboardHand, ResearcherHand, CollectorHand, ClipHand, TwitterHand, quiz::LlmQuizGenerator}};
|
||||
|
||||
/// Skill executor implementation for Kernel
|
||||
pub struct KernelSkillExecutor {
|
||||
@@ -57,6 +57,7 @@ pub struct Kernel {
|
||||
skill_executor: Arc<KernelSkillExecutor>,
|
||||
hands: Arc<HandRegistry>,
|
||||
trigger_manager: crate::trigger_manager::TriggerManager,
|
||||
pending_approvals: Arc<Mutex<Vec<ApprovalEntry>>>,
|
||||
}
|
||||
|
||||
impl Kernel {
|
||||
@@ -85,10 +86,12 @@ impl Kernel {
|
||||
|
||||
// Initialize hand registry with built-in hands
|
||||
let hands = Arc::new(HandRegistry::new());
|
||||
let quiz_model = config.model().to_string();
|
||||
let quiz_generator = Arc::new(LlmQuizGenerator::new(driver.clone(), quiz_model));
|
||||
hands.register(Arc::new(BrowserHand::new())).await;
|
||||
hands.register(Arc::new(SlideshowHand::new())).await;
|
||||
hands.register(Arc::new(SpeechHand::new())).await;
|
||||
hands.register(Arc::new(QuizHand::new())).await;
|
||||
hands.register(Arc::new(QuizHand::with_generator(quiz_generator))).await;
|
||||
hands.register(Arc::new(WhiteboardHand::new())).await;
|
||||
hands.register(Arc::new(ResearcherHand::new())).await;
|
||||
hands.register(Arc::new(CollectorHand::new())).await;
|
||||
@@ -118,6 +121,7 @@ impl Kernel {
|
||||
skill_executor,
|
||||
hands,
|
||||
trigger_manager,
|
||||
pending_approvals: Arc::new(Mutex::new(Vec::new())),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -306,7 +310,8 @@ impl Kernel {
|
||||
.with_model(&model)
|
||||
.with_skill_executor(self.skill_executor.clone())
|
||||
.with_max_tokens(agent_config.max_tokens.unwrap_or_else(|| self.config.max_tokens()))
|
||||
.with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()));
|
||||
.with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()))
|
||||
.with_compaction_threshold(15_000); // Compact when context exceeds ~15k tokens
|
||||
|
||||
// Build system prompt with skill information injected
|
||||
let system_prompt = self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await;
|
||||
@@ -327,6 +332,16 @@ impl Kernel {
|
||||
&self,
|
||||
agent_id: &AgentId,
|
||||
message: String,
|
||||
) -> Result<mpsc::Receiver<zclaw_runtime::LoopEvent>> {
|
||||
self.send_message_stream_with_prompt(agent_id, message, None).await
|
||||
}
|
||||
|
||||
/// Send a message with streaming and optional external system prompt
|
||||
pub async fn send_message_stream_with_prompt(
|
||||
&self,
|
||||
agent_id: &AgentId,
|
||||
message: String,
|
||||
system_prompt_override: Option<String>,
|
||||
) -> Result<mpsc::Receiver<zclaw_runtime::LoopEvent>> {
|
||||
let agent_config = self.registry.get(agent_id)
|
||||
.ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Agent not found: {}", agent_id)))?;
|
||||
@@ -349,10 +364,14 @@ impl Kernel {
|
||||
.with_model(&model)
|
||||
.with_skill_executor(self.skill_executor.clone())
|
||||
.with_max_tokens(agent_config.max_tokens.unwrap_or_else(|| self.config.max_tokens()))
|
||||
.with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()));
|
||||
.with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()))
|
||||
.with_compaction_threshold(15_000); // Compact when context exceeds ~15k tokens
|
||||
|
||||
// Build system prompt with skill information injected
|
||||
let system_prompt = self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await;
|
||||
// Use external prompt if provided, otherwise build default
|
||||
let system_prompt = match system_prompt_override {
|
||||
Some(prompt) => prompt,
|
||||
None => self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await,
|
||||
};
|
||||
let loop_runner = loop_runner.with_system_prompt(&system_prompt);
|
||||
|
||||
// Run with streaming
|
||||
@@ -477,24 +496,82 @@ impl Kernel {
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// Approval Management (Stub Implementation)
|
||||
// Approval Management
|
||||
// ============================================================
|
||||
|
||||
/// List pending approvals
|
||||
pub async fn list_approvals(&self) -> Vec<ApprovalEntry> {
|
||||
// Stub: Return empty list
|
||||
Vec::new()
|
||||
let approvals = self.pending_approvals.lock().await;
|
||||
approvals.iter().filter(|a| a.status == "pending").cloned().collect()
|
||||
}
|
||||
|
||||
/// Create a pending approval (called when a needs_approval hand is triggered)
|
||||
pub async fn create_approval(&self, hand_id: String, input: serde_json::Value) -> ApprovalEntry {
|
||||
let entry = ApprovalEntry {
|
||||
id: uuid::Uuid::new_v4().to_string(),
|
||||
hand_id,
|
||||
status: "pending".to_string(),
|
||||
created_at: chrono::Utc::now(),
|
||||
input,
|
||||
};
|
||||
let mut approvals = self.pending_approvals.lock().await;
|
||||
approvals.push(entry.clone());
|
||||
entry
|
||||
}
|
||||
|
||||
/// Respond to an approval
|
||||
pub async fn respond_to_approval(
|
||||
&self,
|
||||
_id: &str,
|
||||
_approved: bool,
|
||||
id: &str,
|
||||
approved: bool,
|
||||
_reason: Option<String>,
|
||||
) -> Result<()> {
|
||||
// Stub: Return error
|
||||
Err(zclaw_types::ZclawError::NotFound(format!("Approval not found")))
|
||||
let mut approvals = self.pending_approvals.lock().await;
|
||||
let entry = approvals.iter_mut().find(|a| a.id == id && a.status == "pending")
|
||||
.ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Approval not found: {}", id)))?;
|
||||
|
||||
entry.status = if approved { "approved".to_string() } else { "rejected".to_string() };
|
||||
|
||||
if approved {
|
||||
let hand_id = entry.hand_id.clone();
|
||||
let input = entry.input.clone();
|
||||
drop(approvals); // Release lock before async hand execution
|
||||
|
||||
// Execute the hand in background
|
||||
let hands = self.hands.clone();
|
||||
let approvals = self.pending_approvals.clone();
|
||||
let id_owned = id.to_string();
|
||||
tokio::spawn(async move {
|
||||
let context = HandContext::default();
|
||||
let result = hands.execute(&hand_id, &context, input).await;
|
||||
|
||||
// Update approval status based on execution result
|
||||
let mut approvals = approvals.lock().await;
|
||||
if let Some(entry) = approvals.iter_mut().find(|a| a.id == id_owned) {
|
||||
match result {
|
||||
Ok(_) => entry.status = "completed".to_string(),
|
||||
Err(e) => {
|
||||
entry.status = "failed".to_string();
|
||||
// Store error in input metadata
|
||||
if let Some(obj) = entry.input.as_object_mut() {
|
||||
obj.insert("error".to_string(), Value::String(format!("{}", e)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Cancel a pending approval
|
||||
pub async fn cancel_approval(&self, id: &str) -> Result<()> {
|
||||
let mut approvals = self.pending_approvals.lock().await;
|
||||
let entry = approvals.iter_mut().find(|a| a.id == id && a.status == "pending")
|
||||
.ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Approval not found: {}", id)))?;
|
||||
entry.status = "cancelled".to_string();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ tracing = { workspace = true }
|
||||
|
||||
# SQLite
|
||||
sqlx = { workspace = true }
|
||||
libsqlite3-sys = { workspace = true }
|
||||
|
||||
# Async utilities
|
||||
futures = { workspace = true }
|
||||
|
||||
@@ -46,11 +46,14 @@ pub async fn export_files(
|
||||
.map_err(|e| ActionError::Export(format!("Write error: {}", e)))?;
|
||||
}
|
||||
ExportFormat::Pptx => {
|
||||
// Will integrate with zclaw-kernel export
|
||||
return Err(ActionError::Export("PPTX export requires kernel integration".to_string()));
|
||||
return Err(ActionError::Export(
|
||||
"PPTX 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 JSON 格式导出后转换。".to_string(),
|
||||
));
|
||||
}
|
||||
ExportFormat::Pdf => {
|
||||
return Err(ActionError::Export("PDF export not yet implemented".to_string()));
|
||||
return Err(ActionError::Export(
|
||||
"PDF 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 HTML 格式导出后通过浏览器打印为 PDF。".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
//! Hand execution action
|
||||
|
||||
use std::collections::HashMap;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::ActionError;
|
||||
|
||||
/// Execute a hand action
|
||||
pub async fn execute_hand(
|
||||
hand_id: &str,
|
||||
action: &str,
|
||||
_params: HashMap<String, Value>,
|
||||
) -> Result<Value, ActionError> {
|
||||
// This will be implemented by injecting the hand registry
|
||||
// For now, return an error indicating it needs configuration
|
||||
|
||||
Err(ActionError::Hand(format!(
|
||||
"Hand '{}' action '{}' requires hand registry configuration",
|
||||
hand_id, action
|
||||
)))
|
||||
}
|
||||
@@ -7,8 +7,6 @@ mod parallel;
|
||||
mod render;
|
||||
mod export;
|
||||
mod http;
|
||||
mod skill;
|
||||
mod hand;
|
||||
mod orchestration;
|
||||
|
||||
pub use llm::*;
|
||||
@@ -16,8 +14,6 @@ pub use parallel::*;
|
||||
pub use render::*;
|
||||
pub use export::*;
|
||||
pub use http::*;
|
||||
pub use skill::*;
|
||||
pub use hand::*;
|
||||
pub use orchestration::*;
|
||||
|
||||
use std::collections::HashMap;
|
||||
@@ -256,11 +252,14 @@ impl ActionRegistry {
|
||||
tokio::fs::write(&path, content).await?;
|
||||
}
|
||||
ExportFormat::Pptx => {
|
||||
// Will integrate with pptx exporter
|
||||
return Err(ActionError::Export("PPTX export not yet implemented".to_string()));
|
||||
return Err(ActionError::Export(
|
||||
"PPTX 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 JSON 格式导出后转换。".to_string(),
|
||||
));
|
||||
}
|
||||
ExportFormat::Pdf => {
|
||||
return Err(ActionError::Export("PDF export not yet implemented".to_string()));
|
||||
return Err(ActionError::Export(
|
||||
"PDF 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 HTML 格式导出后通过浏览器打印为 PDF。".to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
//! Skill execution action
|
||||
|
||||
use std::collections::HashMap;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::ActionError;
|
||||
|
||||
/// Execute a skill by ID
|
||||
pub async fn execute_skill(
|
||||
skill_id: &str,
|
||||
_input: HashMap<String, Value>,
|
||||
) -> Result<Value, ActionError> {
|
||||
// This will be implemented by injecting the skill registry
|
||||
// For now, return an error indicating it needs configuration
|
||||
|
||||
Err(ActionError::Skill(format!(
|
||||
"Skill '{}' execution requires skill registry configuration",
|
||||
skill_id
|
||||
)))
|
||||
}
|
||||
@@ -10,11 +10,9 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use futures::future::join_all;
|
||||
use serde_json::{Value, json};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::types_v2::{Stage, ConditionalBranch, PresentationType};
|
||||
use crate::types_v2::{Stage, ConditionalBranch};
|
||||
use crate::engine::context::{ExecutionContextV2, ContextError};
|
||||
|
||||
/// Stage execution result
|
||||
@@ -242,14 +240,6 @@ impl StageEngine {
|
||||
Ok(result)
|
||||
}
|
||||
Err(e) => {
|
||||
let result = StageResult {
|
||||
stage_id: stage_id.clone(),
|
||||
output: Value::Null,
|
||||
status: StageStatus::Failed,
|
||||
error: Some(e.to_string()),
|
||||
duration_ms,
|
||||
};
|
||||
|
||||
self.emit_event(StageEvent::Error {
|
||||
stage_id,
|
||||
error: e.to_string(),
|
||||
@@ -312,7 +302,7 @@ impl StageEngine {
|
||||
stage_id: &str,
|
||||
each: &str,
|
||||
stage_template: &Stage,
|
||||
max_workers: usize,
|
||||
_max_workers: usize,
|
||||
context: &mut ExecutionContextV2,
|
||||
) -> Result<Value, StageError> {
|
||||
// Resolve the array to iterate over
|
||||
@@ -419,7 +409,7 @@ impl StageEngine {
|
||||
/// Execute compose stage
|
||||
async fn execute_compose(
|
||||
&self,
|
||||
stage_id: &str,
|
||||
_stage_id: &str,
|
||||
template: &str,
|
||||
context: &ExecutionContextV2,
|
||||
) -> Result<Value, StageError> {
|
||||
@@ -568,7 +558,8 @@ impl StageEngine {
|
||||
Ok(resolved_value)
|
||||
}
|
||||
|
||||
/// Clone with drivers
|
||||
/// Clone with drivers (reserved for future use)
|
||||
#[allow(dead_code)]
|
||||
fn clone_with_drivers(&self) -> Self {
|
||||
Self {
|
||||
llm_driver: self.llm_driver.clone(),
|
||||
|
||||
@@ -396,6 +396,7 @@ pub trait LlmIntentDriver: Send + Sync {
|
||||
}
|
||||
|
||||
/// Default LLM driver implementation using prompt-based matching
|
||||
#[allow(dead_code)]
|
||||
pub struct DefaultLlmIntentDriver {
|
||||
/// Model ID to use
|
||||
model_id: String,
|
||||
|
||||
@@ -57,6 +57,7 @@ pub mod intent;
|
||||
pub mod engine;
|
||||
pub mod presentation;
|
||||
|
||||
// Glob re-exports with explicit disambiguation for conflicting names
|
||||
pub use types::*;
|
||||
pub use types_v2::*;
|
||||
pub use parser::*;
|
||||
@@ -67,6 +68,14 @@ pub use trigger::*;
|
||||
pub use intent::*;
|
||||
pub use engine::*;
|
||||
pub use presentation::*;
|
||||
|
||||
// Explicit re-exports: presentation::* wins for PresentationType/ExportFormat
|
||||
// types_v2::* wins for InputMode, engine::* wins for LoopContext
|
||||
pub use presentation::PresentationType;
|
||||
pub use presentation::ExportFormat;
|
||||
pub use types_v2::InputMode;
|
||||
pub use engine::context::LoopContext;
|
||||
|
||||
pub use actions::ActionRegistry;
|
||||
pub use actions::{LlmActionDriver, SkillActionDriver, HandActionDriver, OrchestrationActionDriver};
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
//! - Better recommendations for ambiguous cases
|
||||
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::types::*;
|
||||
|
||||
|
||||
@@ -254,13 +254,13 @@ pub fn compile_pattern(pattern: &str) -> Result<CompiledPattern, PatternError> {
|
||||
'{' => {
|
||||
// Named capture group
|
||||
let mut name = String::new();
|
||||
let mut has_type = false;
|
||||
let mut _has_type = false;
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
match c {
|
||||
'}' => break,
|
||||
':' => {
|
||||
has_type = true;
|
||||
_has_type = true;
|
||||
// Skip type part
|
||||
while let Some(nc) = chars.peek() {
|
||||
if *nc == '}' {
|
||||
|
||||
365
crates/zclaw-runtime/src/compaction.rs
Normal file
365
crates/zclaw-runtime/src/compaction.rs
Normal file
@@ -0,0 +1,365 @@
|
||||
//! Context compaction for the agent loop.
|
||||
//!
|
||||
//! Provides rule-based token estimation and message compaction to prevent
|
||||
//! conversations from exceeding LLM context windows. When the estimated
|
||||
//! token count exceeds the configured threshold, older messages are
|
||||
//! summarized into a single system message and only recent messages are
|
||||
//! retained.
|
||||
|
||||
use zclaw_types::Message;
|
||||
|
||||
/// Number of recent messages to preserve after compaction.
|
||||
const DEFAULT_KEEP_RECENT: usize = 6;
|
||||
|
||||
/// Heuristic token count estimation.
|
||||
///
|
||||
/// CJK characters ≈ 1.5 tokens each, English words ≈ 1.3 tokens each.
|
||||
/// Intentionally conservative (overestimates) to avoid hitting real limits.
|
||||
pub fn estimate_tokens(text: &str) -> usize {
|
||||
if text.is_empty() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let mut tokens: f64 = 0.0;
|
||||
for char in text.chars() {
|
||||
let code = char as u32;
|
||||
if (0x4E00..=0x9FFF).contains(&code)
|
||||
|| (0x3400..=0x4DBF).contains(&code)
|
||||
|| (0x20000..=0x2A6DF).contains(&code)
|
||||
|| (0xF900..=0xFAFF).contains(&code)
|
||||
{
|
||||
// CJK ideographs — ~1.5 tokens
|
||||
tokens += 1.5;
|
||||
} else if (0x3000..=0x303F).contains(&code) || (0xFF00..=0xFFEF).contains(&code) {
|
||||
// CJK / fullwidth punctuation — ~1.0 token
|
||||
tokens += 1.0;
|
||||
} else if char == ' ' || char == '\n' || char == '\t' {
|
||||
// whitespace
|
||||
tokens += 0.25;
|
||||
} else {
|
||||
// ASCII / Latin characters — roughly 4 chars per token
|
||||
tokens += 0.3;
|
||||
}
|
||||
}
|
||||
|
||||
tokens.ceil() as usize
|
||||
}
|
||||
|
||||
/// Estimate total tokens for a list of messages (including framing overhead).
|
||||
pub fn estimate_messages_tokens(messages: &[Message]) -> usize {
|
||||
let mut total = 0;
|
||||
for msg in messages {
|
||||
match msg {
|
||||
Message::User { content } => {
|
||||
total += estimate_tokens(content);
|
||||
total += 4;
|
||||
}
|
||||
Message::Assistant { content, thinking } => {
|
||||
total += estimate_tokens(content);
|
||||
if let Some(th) = thinking {
|
||||
total += estimate_tokens(th);
|
||||
}
|
||||
total += 4;
|
||||
}
|
||||
Message::System { content } => {
|
||||
total += estimate_tokens(content);
|
||||
total += 4;
|
||||
}
|
||||
Message::ToolUse { input, .. } => {
|
||||
total += estimate_tokens(&input.to_string());
|
||||
total += 4;
|
||||
}
|
||||
Message::ToolResult { output, .. } => {
|
||||
total += estimate_tokens(&output.to_string());
|
||||
total += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
total
|
||||
}
|
||||
|
||||
/// Compact a message list by summarizing old messages and keeping recent ones.
|
||||
///
|
||||
/// When `messages.len() > keep_recent`, the oldest messages are summarized
|
||||
/// into a single system message. System messages at the beginning of the
|
||||
/// conversation are always preserved.
|
||||
///
|
||||
/// Returns the compacted message list and the number of original messages removed.
|
||||
pub fn compact_messages(messages: Vec<Message>, keep_recent: usize) -> (Vec<Message>, usize) {
|
||||
if messages.len() <= keep_recent {
|
||||
return (messages, 0);
|
||||
}
|
||||
|
||||
// Preserve leading system messages (they contain compaction summaries from prior runs)
|
||||
let leading_system_count = messages
|
||||
.iter()
|
||||
.take_while(|m| matches!(m, Message::System { .. }))
|
||||
.count();
|
||||
|
||||
// Calculate split point: keep leading system + recent messages
|
||||
let keep_from_end = keep_recent.min(messages.len().saturating_sub(leading_system_count));
|
||||
let split_index = messages.len().saturating_sub(keep_from_end);
|
||||
|
||||
// Ensure we keep at least the leading system messages
|
||||
let split_index = split_index.max(leading_system_count);
|
||||
|
||||
if split_index == 0 {
|
||||
return (messages, 0);
|
||||
}
|
||||
|
||||
let old_messages = &messages[..split_index];
|
||||
let recent_messages = &messages[split_index..];
|
||||
|
||||
let summary = generate_summary(old_messages);
|
||||
let removed_count = old_messages.len();
|
||||
|
||||
let mut compacted = Vec::with_capacity(1 + recent_messages.len());
|
||||
compacted.push(Message::system(summary));
|
||||
compacted.extend(recent_messages.iter().cloned());
|
||||
|
||||
(compacted, removed_count)
|
||||
}
|
||||
|
||||
/// Check if compaction should be triggered and perform it if needed.
|
||||
///
|
||||
/// Returns the (possibly compacted) message list.
|
||||
pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
|
||||
let tokens = estimate_messages_tokens(&messages);
|
||||
if tokens < threshold {
|
||||
return messages;
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
"[Compaction] Triggered: {} tokens > {} threshold, {} messages",
|
||||
tokens,
|
||||
threshold,
|
||||
messages.len(),
|
||||
);
|
||||
|
||||
let (compacted, removed) = compact_messages(messages, DEFAULT_KEEP_RECENT);
|
||||
tracing::info!(
|
||||
"[Compaction] Removed {} messages, {} remain",
|
||||
removed,
|
||||
compacted.len(),
|
||||
);
|
||||
|
||||
compacted
|
||||
}
|
||||
|
||||
/// Generate a rule-based summary of old messages.
|
||||
fn generate_summary(messages: &[Message]) -> String {
|
||||
if messages.is_empty() {
|
||||
return "[对话开始]".to_string();
|
||||
}
|
||||
|
||||
let mut sections: Vec<String> = vec!["[以下是之前对话的摘要]".to_string()];
|
||||
|
||||
let mut user_count = 0;
|
||||
let mut assistant_count = 0;
|
||||
let mut topics: Vec<String> = Vec::new();
|
||||
|
||||
for msg in messages {
|
||||
match msg {
|
||||
Message::User { content } => {
|
||||
user_count += 1;
|
||||
let topic = extract_topic(content);
|
||||
if let Some(t) = topic {
|
||||
topics.push(t);
|
||||
}
|
||||
}
|
||||
Message::Assistant { .. } => {
|
||||
assistant_count += 1;
|
||||
}
|
||||
Message::System { content } => {
|
||||
// Skip system messages that are previous compaction summaries
|
||||
if !content.starts_with("[以下是之前对话的摘要]") {
|
||||
sections.push(format!("系统提示: {}", truncate(content, 60)));
|
||||
}
|
||||
}
|
||||
Message::ToolUse { tool, .. } => {
|
||||
sections.push(format!("工具调用: {}", tool.as_str()));
|
||||
}
|
||||
Message::ToolResult { .. } => {
|
||||
// Skip tool results in summary
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !topics.is_empty() {
|
||||
let topic_list: Vec<String> = topics.iter().take(8).cloned().collect();
|
||||
sections.push(format!("讨论主题: {}", topic_list.join("; ")));
|
||||
}
|
||||
|
||||
sections.push(format!(
|
||||
"(已压缩 {} 条消息,其中用户 {} 条,助手 {} 条)",
|
||||
messages.len(),
|
||||
user_count,
|
||||
assistant_count,
|
||||
));
|
||||
|
||||
let summary = sections.join("\n");
|
||||
|
||||
// Enforce max length
|
||||
let max_chars = 800;
|
||||
if summary.len() > max_chars {
|
||||
format!("{}...\n(摘要已截断)", &summary[..max_chars])
|
||||
} else {
|
||||
summary
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the main topic from a user message (first sentence or first 50 chars).
|
||||
fn extract_topic(content: &str) -> Option<String> {
|
||||
let trimmed = content.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Find sentence end markers
|
||||
for (i, char) in trimmed.char_indices() {
|
||||
if char == '。' || char == '!' || char == '?' || char == '\n' {
|
||||
let end = i + char.len_utf8();
|
||||
if end <= 80 {
|
||||
return Some(trimmed[..end].trim().to_string());
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if trimmed.chars().count() <= 50 {
|
||||
return Some(trimmed.to_string());
|
||||
}
|
||||
|
||||
Some(format!("{}...", trimmed.chars().take(50).collect::<String>()))
|
||||
}
|
||||
|
||||
/// Truncate text to max_chars at char boundary.
|
||||
fn truncate(text: &str, max_chars: usize) -> String {
|
||||
if text.chars().count() <= max_chars {
|
||||
return text.to_string();
|
||||
}
|
||||
let truncated: String = text.chars().take(max_chars).collect();
|
||||
format!("{}...", truncated)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_estimate_tokens_empty() {
|
||||
assert_eq!(estimate_tokens(""), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_estimate_tokens_english() {
|
||||
let tokens = estimate_tokens("Hello world");
|
||||
assert!(tokens > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_estimate_tokens_cjk() {
|
||||
let tokens = estimate_tokens("你好世界");
|
||||
assert!(tokens > 3); // CJK chars are ~1.5 tokens each
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_estimate_messages_tokens() {
|
||||
let messages = vec![
|
||||
Message::user("Hello"),
|
||||
Message::assistant("Hi there"),
|
||||
];
|
||||
let tokens = estimate_messages_tokens(&messages);
|
||||
assert!(tokens > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_messages_under_threshold() {
|
||||
let messages = vec![
|
||||
Message::user("Hello"),
|
||||
Message::assistant("Hi"),
|
||||
];
|
||||
let (result, removed) = compact_messages(messages, 6);
|
||||
assert_eq!(removed, 0);
|
||||
assert_eq!(result.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_messages_over_threshold() {
|
||||
let messages: Vec<Message> = (0..10)
|
||||
.flat_map(|i| {
|
||||
vec![
|
||||
Message::user(format!("Question {}", i)),
|
||||
Message::assistant(format!("Answer {}", i)),
|
||||
]
|
||||
})
|
||||
.collect();
|
||||
|
||||
let (result, removed) = compact_messages(messages, 4);
|
||||
assert!(removed > 0);
|
||||
// Should have: 1 summary + 4 recent messages
|
||||
assert_eq!(result.len(), 5);
|
||||
// First message should be a system summary
|
||||
assert!(matches!(&result[0], Message::System { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_preserves_leading_system() {
|
||||
let messages = vec![
|
||||
Message::system("You are helpful"),
|
||||
Message::user("Q1"),
|
||||
Message::assistant("A1"),
|
||||
Message::user("Q2"),
|
||||
Message::assistant("A2"),
|
||||
Message::user("Q3"),
|
||||
Message::assistant("A3"),
|
||||
];
|
||||
|
||||
let (result, removed) = compact_messages(messages, 4);
|
||||
assert!(removed > 0);
|
||||
// Should start with compaction summary, then recent messages
|
||||
assert!(matches!(&result[0], Message::System { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_maybe_compact_under_threshold() {
|
||||
let messages = vec![
|
||||
Message::user("Short message"),
|
||||
Message::assistant("Short reply"),
|
||||
];
|
||||
let result = maybe_compact(messages, 100_000);
|
||||
assert_eq!(result.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_topic_sentence() {
|
||||
let topic = extract_topic("什么是Rust的所有权系统?").unwrap();
|
||||
assert!(topic.contains("所有权"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_topic_short() {
|
||||
let topic = extract_topic("Hello").unwrap();
|
||||
assert_eq!(topic, "Hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_topic_long() {
|
||||
let long = "This is a very long message that exceeds fifty characters in total length";
|
||||
let topic = extract_topic(long).unwrap();
|
||||
assert!(topic.ends_with("..."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_summary() {
|
||||
let messages = vec![
|
||||
Message::user("What is Rust?"),
|
||||
Message::assistant("Rust is a systems programming language"),
|
||||
Message::user("How does ownership work?"),
|
||||
Message::assistant("Ownership is Rust's memory management system"),
|
||||
];
|
||||
let summary = generate_summary(&messages);
|
||||
assert!(summary.contains("摘要"));
|
||||
assert!(summary.contains("2"));
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,17 @@
|
||||
//! Google Gemini driver implementation
|
||||
//!
|
||||
//! Implements the Gemini REST API v1beta with full support for:
|
||||
//! - Text generation (complete and streaming)
|
||||
//! - Tool / function calling
|
||||
//! - System instructions
|
||||
//! - Token usage reporting
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::Stream;
|
||||
use async_stream::stream;
|
||||
use futures::{Stream, StreamExt};
|
||||
use secrecy::{ExposeSecret, SecretString};
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::pin::Pin;
|
||||
use zclaw_types::{Result, ZclawError};
|
||||
|
||||
@@ -11,7 +19,6 @@ use super::{CompletionRequest, CompletionResponse, ContentBlock, LlmDriver, Stop
|
||||
use crate::stream::StreamChunk;
|
||||
|
||||
/// Google Gemini driver
|
||||
#[allow(dead_code)] // TODO: Implement full Gemini API support
|
||||
pub struct GeminiDriver {
|
||||
client: Client,
|
||||
api_key: SecretString,
|
||||
@@ -21,11 +28,31 @@ pub struct GeminiDriver {
|
||||
impl GeminiDriver {
|
||||
pub fn new(api_key: SecretString) -> Self {
|
||||
Self {
|
||||
client: Client::new(),
|
||||
client: Client::builder()
|
||||
.user_agent(crate::USER_AGENT)
|
||||
.http1_only()
|
||||
.timeout(std::time::Duration::from_secs(120))
|
||||
.connect_timeout(std::time::Duration::from_secs(30))
|
||||
.build()
|
||||
.unwrap_or_else(|_| Client::new()),
|
||||
api_key,
|
||||
base_url: "https://generativelanguage.googleapis.com/v1beta".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_base_url(api_key: SecretString, base_url: String) -> Self {
|
||||
Self {
|
||||
client: Client::builder()
|
||||
.user_agent(crate::USER_AGENT)
|
||||
.http1_only()
|
||||
.timeout(std::time::Duration::from_secs(120))
|
||||
.connect_timeout(std::time::Duration::from_secs(30))
|
||||
.build()
|
||||
.unwrap_or_else(|_| Client::new()),
|
||||
api_key,
|
||||
base_url,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -39,25 +66,594 @@ impl LlmDriver for GeminiDriver {
|
||||
}
|
||||
|
||||
async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
|
||||
// TODO: Implement actual API call
|
||||
Ok(CompletionResponse {
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "Gemini driver not yet implemented".to_string(),
|
||||
}],
|
||||
model: request.model,
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
stop_reason: StopReason::EndTurn,
|
||||
})
|
||||
let api_request = self.build_api_request(&request);
|
||||
let url = format!(
|
||||
"{}/models/{}:generateContent?key={}",
|
||||
self.base_url,
|
||||
request.model,
|
||||
self.api_key.expose_secret()
|
||||
);
|
||||
|
||||
tracing::debug!(target: "gemini_driver", "Sending request to: {}", url);
|
||||
|
||||
let response = self.client
|
||||
.post(&url)
|
||||
.header("content-type", "application/json")
|
||||
.json(&api_request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| ZclawError::LlmError(format!("HTTP request failed: {}", e)))?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
tracing::warn!(target: "gemini_driver", "API error {}: {}", status, body);
|
||||
return Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
|
||||
}
|
||||
|
||||
let api_response: GeminiResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| ZclawError::LlmError(format!("Failed to parse response: {}", e)))?;
|
||||
|
||||
Ok(self.convert_response(api_response, request.model))
|
||||
}
|
||||
|
||||
fn stream(
|
||||
&self,
|
||||
_request: CompletionRequest,
|
||||
request: CompletionRequest,
|
||||
) -> Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send + '_>> {
|
||||
// Placeholder - return error stream
|
||||
Box::pin(futures::stream::once(async {
|
||||
Err(ZclawError::LlmError("Gemini streaming not yet implemented".to_string()))
|
||||
}))
|
||||
let api_request = self.build_api_request(&request);
|
||||
let url = format!(
|
||||
"{}/models/{}:streamGenerateContent?alt=sse&key={}",
|
||||
self.base_url,
|
||||
request.model,
|
||||
self.api_key.expose_secret()
|
||||
);
|
||||
|
||||
tracing::debug!(target: "gemini_driver", "Starting stream request to: {}", url);
|
||||
|
||||
Box::pin(stream! {
|
||||
let response = match self.client
|
||||
.post(&url)
|
||||
.header("content-type", "application/json")
|
||||
.timeout(std::time::Duration::from_secs(120))
|
||||
.json(&api_request)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Ok(r) => {
|
||||
tracing::debug!(target: "gemini_driver", "Stream response status: {}", r.status());
|
||||
r
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::error!(target: "gemini_driver", "HTTP request failed: {:?}", e);
|
||||
yield Err(ZclawError::LlmError(format!("HTTP request failed: {}", e)));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
|
||||
return;
|
||||
}
|
||||
|
||||
let mut byte_stream = response.bytes_stream();
|
||||
let mut accumulated_tool_calls: std::collections::HashMap<usize, (String, String)> = std::collections::HashMap::new();
|
||||
|
||||
while let Some(chunk_result) = byte_stream.next().await {
|
||||
let chunk = match chunk_result {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let text = String::from_utf8_lossy(&chunk);
|
||||
for line in text.lines() {
|
||||
if let Some(data) = line.strip_prefix("data: ") {
|
||||
match serde_json::from_str::<GeminiStreamResponse>(data) {
|
||||
Ok(resp) => {
|
||||
if let Some(candidate) = resp.candidates.first() {
|
||||
let content = match &candidate.content {
|
||||
Some(c) => c,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let parts = &content.parts;
|
||||
|
||||
for (idx, part) in parts.iter().enumerate() {
|
||||
// Handle text content
|
||||
if let Some(text) = &part.text {
|
||||
if !text.is_empty() {
|
||||
yield Ok(StreamChunk::TextDelta { delta: text.clone() });
|
||||
}
|
||||
}
|
||||
|
||||
// Handle function call (tool use)
|
||||
if let Some(fc) = &part.function_call {
|
||||
let name = fc.name.clone().unwrap_or_default();
|
||||
let args = fc.args.clone().unwrap_or(serde_json::Value::Object(Default::default()));
|
||||
|
||||
// Emit ToolUseStart if this is a new tool call
|
||||
if !accumulated_tool_calls.contains_key(&idx) {
|
||||
accumulated_tool_calls.insert(idx, (name.clone(), String::new()));
|
||||
yield Ok(StreamChunk::ToolUseStart {
|
||||
id: format!("gemini_call_{}", idx),
|
||||
name,
|
||||
});
|
||||
}
|
||||
|
||||
// Emit the function arguments as delta
|
||||
let args_str = serde_json::to_string(&args).unwrap_or_default();
|
||||
let call_id = format!("gemini_call_{}", idx);
|
||||
yield Ok(StreamChunk::ToolUseDelta {
|
||||
id: call_id.clone(),
|
||||
delta: args_str.clone(),
|
||||
});
|
||||
|
||||
// Accumulate
|
||||
if let Some(entry) = accumulated_tool_calls.get_mut(&idx) {
|
||||
entry.1 = args_str;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// When the candidate is finished, emit ToolUseEnd for all pending
|
||||
if let Some(ref finish_reason) = candidate.finish_reason {
|
||||
let is_final = finish_reason == "STOP" || finish_reason == "MAX_TOKENS";
|
||||
|
||||
if is_final {
|
||||
// Emit ToolUseEnd for all accumulated tool calls
|
||||
for (idx, (_name, args_str)) in &accumulated_tool_calls {
|
||||
let input: serde_json::Value = if args_str.is_empty() {
|
||||
serde_json::json!({})
|
||||
} else {
|
||||
serde_json::from_str(args_str).unwrap_or_else(|e| {
|
||||
tracing::warn!(target: "gemini_driver", "Failed to parse tool args '{}': {}", args_str, e);
|
||||
serde_json::json!({})
|
||||
})
|
||||
};
|
||||
yield Ok(StreamChunk::ToolUseEnd {
|
||||
id: format!("gemini_call_{}", idx),
|
||||
input,
|
||||
});
|
||||
}
|
||||
|
||||
// Extract usage metadata from the response
|
||||
let usage = resp.usage_metadata.as_ref();
|
||||
let input_tokens = usage.map(|u| u.prompt_token_count.unwrap_or(0)).unwrap_or(0);
|
||||
let output_tokens = usage.map(|u| u.candidates_token_count.unwrap_or(0)).unwrap_or(0);
|
||||
|
||||
let stop_reason = match finish_reason.as_str() {
|
||||
"STOP" => "end_turn",
|
||||
"MAX_TOKENS" => "max_tokens",
|
||||
"SAFETY" => "error",
|
||||
"RECITATION" => "error",
|
||||
_ => "end_turn",
|
||||
};
|
||||
|
||||
yield Ok(StreamChunk::Complete {
|
||||
input_tokens,
|
||||
output_tokens,
|
||||
stop_reason: stop_reason.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(target: "gemini_driver", "Failed to parse SSE event: {} - {}", e, data);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl GeminiDriver {
|
||||
/// Convert a CompletionRequest into the Gemini API request format.
|
||||
///
|
||||
/// Key mapping decisions:
|
||||
/// - `system` prompt maps to `systemInstruction`
|
||||
/// - Messages use Gemini's `contents` array with `role`/`parts`
|
||||
/// - Tool definitions use `functionDeclarations`
|
||||
/// - Tool results are sent as `functionResponse` parts in `user` messages
|
||||
fn build_api_request(&self, request: &CompletionRequest) -> GeminiRequest {
|
||||
let mut contents: Vec<GeminiContent> = Vec::new();
|
||||
|
||||
for msg in &request.messages {
|
||||
match msg {
|
||||
zclaw_types::Message::User { content } => {
|
||||
contents.push(GeminiContent {
|
||||
role: "user".to_string(),
|
||||
parts: vec![GeminiPart {
|
||||
text: Some(content.clone()),
|
||||
inline_data: None,
|
||||
function_call: None,
|
||||
function_response: None,
|
||||
}],
|
||||
});
|
||||
}
|
||||
zclaw_types::Message::Assistant { content, thinking } => {
|
||||
let mut parts = Vec::new();
|
||||
// Gemini does not have a native "thinking" field, so we prepend
|
||||
// any thinking content as a text part with a marker.
|
||||
if let Some(think) = thinking {
|
||||
if !think.is_empty() {
|
||||
parts.push(GeminiPart {
|
||||
text: Some(format!("[thinking]\n{}\n[/thinking]", think)),
|
||||
inline_data: None,
|
||||
function_call: None,
|
||||
function_response: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
parts.push(GeminiPart {
|
||||
text: Some(content.clone()),
|
||||
inline_data: None,
|
||||
function_call: None,
|
||||
function_response: None,
|
||||
});
|
||||
contents.push(GeminiContent {
|
||||
role: "model".to_string(),
|
||||
parts,
|
||||
});
|
||||
}
|
||||
zclaw_types::Message::ToolUse { id: _, tool, input } => {
|
||||
// Tool use from the assistant is represented as a functionCall part
|
||||
let args = if input.is_null() {
|
||||
serde_json::json!({})
|
||||
} else {
|
||||
input.clone()
|
||||
};
|
||||
contents.push(GeminiContent {
|
||||
role: "model".to_string(),
|
||||
parts: vec![GeminiPart {
|
||||
text: None,
|
||||
inline_data: None,
|
||||
function_call: Some(GeminiFunctionCall {
|
||||
name: Some(tool.to_string()),
|
||||
args: Some(args),
|
||||
}),
|
||||
function_response: None,
|
||||
}],
|
||||
});
|
||||
}
|
||||
zclaw_types::Message::ToolResult { tool_call_id, tool, output, is_error } => {
|
||||
// Tool results are sent as functionResponse parts in a "user" role message.
|
||||
// Gemini requires that function responses reference the function name
|
||||
// and include the response wrapped in a "result" or "error" key.
|
||||
let response_content = if *is_error {
|
||||
serde_json::json!({ "error": output.to_string() })
|
||||
} else {
|
||||
serde_json::json!({ "result": output.clone() })
|
||||
};
|
||||
|
||||
contents.push(GeminiContent {
|
||||
role: "user".to_string(),
|
||||
parts: vec![GeminiPart {
|
||||
text: None,
|
||||
inline_data: None,
|
||||
function_call: None,
|
||||
function_response: Some(GeminiFunctionResponse {
|
||||
name: tool.to_string(),
|
||||
response: response_content,
|
||||
}),
|
||||
}],
|
||||
});
|
||||
|
||||
// Gemini ignores tool_call_id, but we log it for debugging
|
||||
let _ = tool_call_id;
|
||||
}
|
||||
zclaw_types::Message::System { content } => {
|
||||
// System messages are converted to user messages with system context.
|
||||
// Note: the primary system prompt is handled via systemInstruction.
|
||||
// Inline system messages in conversation history become user messages.
|
||||
contents.push(GeminiContent {
|
||||
role: "user".to_string(),
|
||||
parts: vec![GeminiPart {
|
||||
text: Some(content.clone()),
|
||||
inline_data: None,
|
||||
function_call: None,
|
||||
function_response: None,
|
||||
}],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build tool declarations
|
||||
let function_declarations: Vec<GeminiFunctionDeclaration> = request.tools
|
||||
.iter()
|
||||
.map(|t| GeminiFunctionDeclaration {
|
||||
name: t.name.clone(),
|
||||
description: t.description.clone(),
|
||||
parameters: t.input_schema.clone(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Build generation config
|
||||
let mut generation_config = GeminiGenerationConfig::default();
|
||||
if let Some(temp) = request.temperature {
|
||||
generation_config.temperature = Some(temp);
|
||||
}
|
||||
if let Some(max) = request.max_tokens {
|
||||
generation_config.max_output_tokens = Some(max);
|
||||
}
|
||||
if !request.stop.is_empty() {
|
||||
generation_config.stop_sequences = Some(request.stop.clone());
|
||||
}
|
||||
|
||||
// Build system instruction
|
||||
let system_instruction = request.system.as_ref().map(|s| GeminiSystemInstruction {
|
||||
parts: vec![GeminiPart {
|
||||
text: Some(s.clone()),
|
||||
inline_data: None,
|
||||
function_call: None,
|
||||
function_response: None,
|
||||
}],
|
||||
});
|
||||
|
||||
GeminiRequest {
|
||||
contents,
|
||||
system_instruction,
|
||||
generation_config: Some(generation_config),
|
||||
tools: if function_declarations.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(vec![GeminiTool {
|
||||
function_declarations,
|
||||
}])
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a Gemini API response into a CompletionResponse.
|
||||
fn convert_response(&self, api_response: GeminiResponse, model: String) -> CompletionResponse {
|
||||
let candidate = api_response.candidates.first();
|
||||
|
||||
let (content, stop_reason) = match candidate {
|
||||
Some(c) => {
|
||||
let parts = c.content.as_ref()
|
||||
.map(|content| content.parts.as_slice())
|
||||
.unwrap_or(&[]);
|
||||
|
||||
let mut blocks: Vec<ContentBlock> = Vec::new();
|
||||
let mut has_tool_use = false;
|
||||
|
||||
for part in parts {
|
||||
// Handle text content
|
||||
if let Some(text) = &part.text {
|
||||
// Skip thinking markers we injected
|
||||
if text.starts_with("[thinking]\n") && text.contains("[/thinking]") {
|
||||
let thinking_content = text
|
||||
.strip_prefix("[thinking]\n")
|
||||
.and_then(|s| s.strip_suffix("\n[/thinking]"))
|
||||
.unwrap_or("");
|
||||
if !thinking_content.is_empty() {
|
||||
blocks.push(ContentBlock::Thinking {
|
||||
thinking: thinking_content.to_string(),
|
||||
});
|
||||
}
|
||||
} else if !text.is_empty() {
|
||||
blocks.push(ContentBlock::Text { text: text.clone() });
|
||||
}
|
||||
}
|
||||
|
||||
// Handle function call (tool use)
|
||||
if let Some(fc) = &part.function_call {
|
||||
has_tool_use = true;
|
||||
blocks.push(ContentBlock::ToolUse {
|
||||
id: format!("gemini_call_{}", blocks.len()),
|
||||
name: fc.name.clone().unwrap_or_default(),
|
||||
input: fc.args.clone().unwrap_or(serde_json::Value::Object(Default::default())),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// If there are no content blocks, add an empty text block
|
||||
if blocks.is_empty() {
|
||||
blocks.push(ContentBlock::Text { text: String::new() });
|
||||
}
|
||||
|
||||
let stop = match c.finish_reason.as_deref() {
|
||||
Some("STOP") => StopReason::EndTurn,
|
||||
Some("MAX_TOKENS") => StopReason::MaxTokens,
|
||||
Some("SAFETY") => StopReason::Error,
|
||||
Some("RECITATION") => StopReason::Error,
|
||||
Some("TOOL_USE") => StopReason::ToolUse,
|
||||
_ => {
|
||||
if has_tool_use {
|
||||
StopReason::ToolUse
|
||||
} else {
|
||||
StopReason::EndTurn
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
(blocks, stop)
|
||||
}
|
||||
None => {
|
||||
tracing::warn!(target: "gemini_driver", "No candidates in response");
|
||||
(
|
||||
vec![ContentBlock::Text { text: String::new() }],
|
||||
StopReason::EndTurn,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let usage = api_response.usage_metadata.as_ref();
|
||||
let input_tokens = usage.map(|u| u.prompt_token_count.unwrap_or(0)).unwrap_or(0);
|
||||
let output_tokens = usage.map(|u| u.candidates_token_count.unwrap_or(0)).unwrap_or(0);
|
||||
|
||||
CompletionResponse {
|
||||
content,
|
||||
model,
|
||||
input_tokens,
|
||||
output_tokens,
|
||||
stop_reason,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Gemini API request types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GeminiRequest {
|
||||
contents: Vec<GeminiContent>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
system_instruction: Option<GeminiSystemInstruction>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
generation_config: Option<GeminiGenerationConfig>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
tools: Option<Vec<GeminiTool>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GeminiContent {
|
||||
role: String,
|
||||
parts: Vec<GeminiPart>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
struct GeminiPart {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
text: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
inline_data: Option<serde_json::Value>,
|
||||
#[serde(rename = "functionCall", skip_serializing_if = "Option::is_none")]
|
||||
function_call: Option<GeminiFunctionCall>,
|
||||
#[serde(rename = "functionResponse", skip_serializing_if = "Option::is_none")]
|
||||
function_response: Option<GeminiFunctionResponse>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GeminiSystemInstruction {
|
||||
parts: Vec<GeminiPart>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GeminiGenerationConfig {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
temperature: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
max_output_tokens: Option<u32>,
|
||||
#[serde(rename = "stopSequences", skip_serializing_if = "Option::is_none")]
|
||||
stop_sequences: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl Default for GeminiGenerationConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
temperature: None,
|
||||
max_output_tokens: None,
|
||||
stop_sequences: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GeminiTool {
|
||||
#[serde(rename = "functionDeclarations")]
|
||||
function_declarations: Vec<GeminiFunctionDeclaration>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GeminiFunctionDeclaration {
|
||||
name: String,
|
||||
description: String,
|
||||
parameters: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
struct GeminiFunctionCall {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
name: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
args: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
struct GeminiFunctionResponse {
|
||||
name: String,
|
||||
response: serde_json::Value,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Gemini API response types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GeminiResponse {
|
||||
#[serde(default)]
|
||||
candidates: Vec<GeminiCandidate>,
|
||||
#[serde(default)]
|
||||
usage_metadata: Option<GeminiUsageMetadata>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GeminiCandidate {
|
||||
#[serde(default)]
|
||||
content: Option<GeminiResponseContent>,
|
||||
#[serde(default)]
|
||||
finish_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GeminiResponseContent {
|
||||
#[serde(default)]
|
||||
parts: Vec<GeminiResponsePart>,
|
||||
#[serde(default)]
|
||||
#[allow(dead_code)]
|
||||
role: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GeminiResponsePart {
|
||||
#[serde(default)]
|
||||
text: Option<String>,
|
||||
#[serde(rename = "functionCall", default)]
|
||||
function_call: Option<GeminiResponseFunctionCall>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GeminiResponseFunctionCall {
|
||||
#[serde(default)]
|
||||
name: Option<String>,
|
||||
#[serde(default)]
|
||||
args: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GeminiUsageMetadata {
|
||||
#[serde(default)]
|
||||
prompt_token_count: Option<u32>,
|
||||
#[serde(default)]
|
||||
candidates_token_count: Option<u32>,
|
||||
#[serde(default)]
|
||||
#[allow(dead_code)]
|
||||
total_token_count: Option<u32>,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Gemini streaming types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Streaming response from the Gemini SSE endpoint.
|
||||
/// Each SSE event contains the same structure as the non-streaming response,
|
||||
/// but with incremental content.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GeminiStreamResponse {
|
||||
#[serde(default)]
|
||||
candidates: Vec<GeminiCandidate>,
|
||||
#[serde(default)]
|
||||
usage_metadata: Option<GeminiUsageMetadata>,
|
||||
}
|
||||
|
||||
@@ -1,40 +1,250 @@
|
||||
//! Local LLM driver (Ollama, LM Studio, vLLM, etc.)
|
||||
//!
|
||||
//! Uses the OpenAI-compatible API format. The only differences from the
|
||||
//! OpenAI driver are: no API key is required, and base_url points to a
|
||||
//! local server.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::Stream;
|
||||
use async_stream::stream;
|
||||
use futures::{Stream, StreamExt};
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::pin::Pin;
|
||||
use zclaw_types::{Result, ZclawError};
|
||||
|
||||
use super::{CompletionRequest, CompletionResponse, ContentBlock, LlmDriver, StopReason};
|
||||
use crate::stream::StreamChunk;
|
||||
|
||||
/// Local LLM driver for Ollama, LM Studio, vLLM, etc.
|
||||
#[allow(dead_code)] // TODO: Implement full Local driver support
|
||||
/// Local LLM driver for Ollama, LM Studio, vLLM, and other OpenAI-compatible servers.
|
||||
pub struct LocalDriver {
|
||||
client: Client,
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
impl LocalDriver {
|
||||
/// Create a driver pointing at a custom OpenAI-compatible endpoint.
|
||||
///
|
||||
/// The `base_url` should end with `/v1` (e.g. `http://localhost:8080/v1`).
|
||||
pub fn new(base_url: impl Into<String>) -> Self {
|
||||
Self {
|
||||
client: Client::new(),
|
||||
client: Client::builder()
|
||||
.user_agent(crate::USER_AGENT)
|
||||
.http1_only()
|
||||
.timeout(std::time::Duration::from_secs(300)) // 5 min -- local inference can be slow
|
||||
.connect_timeout(std::time::Duration::from_secs(10)) // short connect timeout
|
||||
.build()
|
||||
.unwrap_or_else(|_| Client::new()),
|
||||
base_url: base_url.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Ollama default endpoint (`http://localhost:11434/v1`).
|
||||
pub fn ollama() -> Self {
|
||||
Self::new("http://localhost:11434/v1")
|
||||
}
|
||||
|
||||
/// LM Studio default endpoint (`http://localhost:1234/v1`).
|
||||
pub fn lm_studio() -> Self {
|
||||
Self::new("http://localhost:1234/v1")
|
||||
}
|
||||
|
||||
/// vLLM default endpoint (`http://localhost:8000/v1`).
|
||||
pub fn vllm() -> Self {
|
||||
Self::new("http://localhost:8000/v1")
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// Request / response conversion (OpenAI-compatible format)
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
fn build_api_request(&self, request: &CompletionRequest) -> LocalApiRequest {
|
||||
let messages: Vec<LocalApiMessage> = request
|
||||
.messages
|
||||
.iter()
|
||||
.filter_map(|msg| match msg {
|
||||
zclaw_types::Message::User { content } => Some(LocalApiMessage {
|
||||
role: "user".to_string(),
|
||||
content: Some(content.clone()),
|
||||
tool_calls: None,
|
||||
}),
|
||||
zclaw_types::Message::Assistant {
|
||||
content,
|
||||
thinking: _,
|
||||
} => Some(LocalApiMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: Some(content.clone()),
|
||||
tool_calls: None,
|
||||
}),
|
||||
zclaw_types::Message::System { content } => Some(LocalApiMessage {
|
||||
role: "system".to_string(),
|
||||
content: Some(content.clone()),
|
||||
tool_calls: None,
|
||||
}),
|
||||
zclaw_types::Message::ToolUse {
|
||||
id, tool, input, ..
|
||||
} => {
|
||||
let args = if input.is_null() {
|
||||
"{}".to_string()
|
||||
} else {
|
||||
serde_json::to_string(input).unwrap_or_else(|_| "{}".to_string())
|
||||
};
|
||||
Some(LocalApiMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: None,
|
||||
tool_calls: Some(vec![LocalApiToolCall {
|
||||
id: id.clone(),
|
||||
r#type: "function".to_string(),
|
||||
function: LocalFunctionCall {
|
||||
name: tool.to_string(),
|
||||
arguments: args,
|
||||
},
|
||||
}]),
|
||||
})
|
||||
}
|
||||
zclaw_types::Message::ToolResult {
|
||||
output, is_error, ..
|
||||
} => Some(LocalApiMessage {
|
||||
role: "tool".to_string(),
|
||||
content: Some(if *is_error {
|
||||
format!("Error: {}", output)
|
||||
} else {
|
||||
output.to_string()
|
||||
}),
|
||||
tool_calls: None,
|
||||
}),
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Prepend system prompt when provided.
|
||||
let mut messages = messages;
|
||||
if let Some(system) = &request.system {
|
||||
messages.insert(
|
||||
0,
|
||||
LocalApiMessage {
|
||||
role: "system".to_string(),
|
||||
content: Some(system.clone()),
|
||||
tool_calls: None,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let tools: Vec<LocalApiTool> = request
|
||||
.tools
|
||||
.iter()
|
||||
.map(|t| LocalApiTool {
|
||||
r#type: "function".to_string(),
|
||||
function: LocalFunctionDef {
|
||||
name: t.name.clone(),
|
||||
description: t.description.clone(),
|
||||
parameters: t.input_schema.clone(),
|
||||
},
|
||||
})
|
||||
.collect();
|
||||
|
||||
LocalApiRequest {
|
||||
model: request.model.clone(),
|
||||
messages,
|
||||
max_tokens: request.max_tokens,
|
||||
temperature: request.temperature,
|
||||
stop: if request.stop.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(request.stop.clone())
|
||||
},
|
||||
stream: request.stream,
|
||||
tools: if tools.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(tools)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_response(
|
||||
&self,
|
||||
api_response: LocalApiResponse,
|
||||
model: String,
|
||||
) -> CompletionResponse {
|
||||
let choice = api_response.choices.first();
|
||||
|
||||
let (content, stop_reason) = match choice {
|
||||
Some(c) => {
|
||||
let has_tool_calls = c
|
||||
.message
|
||||
.tool_calls
|
||||
.as_ref()
|
||||
.map(|tc| !tc.is_empty())
|
||||
.unwrap_or(false);
|
||||
let has_content = c
|
||||
.message
|
||||
.content
|
||||
.as_ref()
|
||||
.map(|t| !t.is_empty())
|
||||
.unwrap_or(false);
|
||||
|
||||
let blocks = if has_tool_calls {
|
||||
let tool_calls = c.message.tool_calls.as_ref().unwrap();
|
||||
tool_calls
|
||||
.iter()
|
||||
.map(|tc| {
|
||||
let input: serde_json::Value =
|
||||
serde_json::from_str(&tc.function.arguments)
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
ContentBlock::ToolUse {
|
||||
id: tc.id.clone(),
|
||||
name: tc.function.name.clone(),
|
||||
input,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
} else if has_content {
|
||||
vec![ContentBlock::Text {
|
||||
text: c.message.content.clone().unwrap(),
|
||||
}]
|
||||
} else {
|
||||
vec![ContentBlock::Text {
|
||||
text: String::new(),
|
||||
}]
|
||||
};
|
||||
|
||||
let stop = match c.finish_reason.as_deref() {
|
||||
Some("stop") => StopReason::EndTurn,
|
||||
Some("length") => StopReason::MaxTokens,
|
||||
Some("tool_calls") => StopReason::ToolUse,
|
||||
_ => StopReason::EndTurn,
|
||||
};
|
||||
|
||||
(blocks, stop)
|
||||
}
|
||||
None => (
|
||||
vec![ContentBlock::Text {
|
||||
text: String::new(),
|
||||
}],
|
||||
StopReason::EndTurn,
|
||||
),
|
||||
};
|
||||
|
||||
let (input_tokens, output_tokens) = api_response
|
||||
.usage
|
||||
.map(|u| (u.prompt_tokens, u.completion_tokens))
|
||||
.unwrap_or((0, 0));
|
||||
|
||||
CompletionResponse {
|
||||
content,
|
||||
model,
|
||||
input_tokens,
|
||||
output_tokens,
|
||||
stop_reason,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the `reqwest::RequestBuilder` with an optional Authorization header.
|
||||
///
|
||||
/// Ollama does not need one; LM Studio / vLLM may be configured with an
|
||||
/// optional API key. We send the header only when a key is present.
|
||||
fn authenticated_post(&self, url: &str) -> reqwest::RequestBuilder {
|
||||
self.client.post(url).header("Accept", "*/*")
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -44,30 +254,394 @@ impl LlmDriver for LocalDriver {
|
||||
}
|
||||
|
||||
fn is_configured(&self) -> bool {
|
||||
// Local drivers don't require API keys
|
||||
// Local drivers never require an API key.
|
||||
true
|
||||
}
|
||||
|
||||
async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
|
||||
// TODO: Implement actual API call (OpenAI-compatible)
|
||||
Ok(CompletionResponse {
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "Local driver not yet implemented".to_string(),
|
||||
}],
|
||||
model: request.model,
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
stop_reason: StopReason::EndTurn,
|
||||
})
|
||||
let api_request = self.build_api_request(&request);
|
||||
let url = format!("{}/chat/completions", self.base_url);
|
||||
|
||||
tracing::debug!(target: "local_driver", "Sending request to {}", url);
|
||||
tracing::trace!(
|
||||
target: "local_driver",
|
||||
"Request body: {}",
|
||||
serde_json::to_string(&api_request).unwrap_or_default()
|
||||
);
|
||||
|
||||
let response = self
|
||||
.authenticated_post(&url)
|
||||
.json(&api_request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
let hint = connection_error_hint(&e);
|
||||
ZclawError::LlmError(format!("Failed to connect to local LLM server at {}: {}{}", self.base_url, e, hint))
|
||||
})?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
tracing::warn!(target: "local_driver", "API error {}: {}", status, body);
|
||||
return Err(ZclawError::LlmError(format!(
|
||||
"Local LLM API error {}: {}",
|
||||
status, body
|
||||
)));
|
||||
}
|
||||
|
||||
let api_response: LocalApiResponse = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| ZclawError::LlmError(format!("Failed to parse response: {}", e)))?;
|
||||
|
||||
Ok(self.convert_response(api_response, request.model))
|
||||
}
|
||||
|
||||
fn stream(
|
||||
&self,
|
||||
_request: CompletionRequest,
|
||||
request: CompletionRequest,
|
||||
) -> Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send + '_>> {
|
||||
// Placeholder - return error stream
|
||||
Box::pin(futures::stream::once(async {
|
||||
Err(ZclawError::LlmError("Local driver streaming not yet implemented".to_string()))
|
||||
}))
|
||||
let mut stream_request = self.build_api_request(&request);
|
||||
stream_request.stream = true;
|
||||
|
||||
let url = format!("{}/chat/completions", self.base_url);
|
||||
tracing::debug!(target: "local_driver", "Starting stream to {}", url);
|
||||
|
||||
Box::pin(stream! {
|
||||
let response = match self
|
||||
.authenticated_post(&url)
|
||||
.header("Content-Type", "application/json")
|
||||
.timeout(std::time::Duration::from_secs(300))
|
||||
.json(&stream_request)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Ok(r) => {
|
||||
tracing::debug!(target: "local_driver", "Stream response status: {}", r.status());
|
||||
r
|
||||
}
|
||||
Err(e) => {
|
||||
let hint = connection_error_hint(&e);
|
||||
tracing::error!(target: "local_driver", "Stream connection failed: {}{}", e, hint);
|
||||
yield Err(ZclawError::LlmError(format!(
|
||||
"Failed to connect to local LLM server at {}: {}{}",
|
||||
self.base_url, e, hint
|
||||
)));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
|
||||
return;
|
||||
}
|
||||
|
||||
let mut byte_stream = response.bytes_stream();
|
||||
let mut accumulated_tool_calls: std::collections::HashMap<String, (String, String)> =
|
||||
std::collections::HashMap::new();
|
||||
let mut current_tool_id: Option<String> = None;
|
||||
|
||||
while let Some(chunk_result) = byte_stream.next().await {
|
||||
let chunk = match chunk_result {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let text = String::from_utf8_lossy(&chunk);
|
||||
for line in text.lines() {
|
||||
if let Some(data) = line.strip_prefix("data: ") {
|
||||
if data == "[DONE]" {
|
||||
tracing::debug!(
|
||||
target: "local_driver",
|
||||
"Stream done, tool_calls accumulated: {}",
|
||||
accumulated_tool_calls.len()
|
||||
);
|
||||
|
||||
for (id, (name, args)) in &accumulated_tool_calls {
|
||||
if name.is_empty() {
|
||||
tracing::warn!(
|
||||
target: "local_driver",
|
||||
"Skipping tool call with empty name: id={}",
|
||||
id
|
||||
);
|
||||
continue;
|
||||
}
|
||||
let parsed_args: serde_json::Value = if args.is_empty() {
|
||||
serde_json::json!({})
|
||||
} else {
|
||||
serde_json::from_str(args).unwrap_or_else(|e| {
|
||||
tracing::warn!(
|
||||
target: "local_driver",
|
||||
"Failed to parse tool args '{}': {}",
|
||||
args, e
|
||||
);
|
||||
serde_json::json!({})
|
||||
})
|
||||
};
|
||||
yield Ok(StreamChunk::ToolUseEnd {
|
||||
id: id.clone(),
|
||||
input: parsed_args,
|
||||
});
|
||||
}
|
||||
|
||||
yield Ok(StreamChunk::Complete {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
stop_reason: "end_turn".to_string(),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
match serde_json::from_str::<LocalStreamResponse>(data) {
|
||||
Ok(resp) => {
|
||||
if let Some(choice) = resp.choices.first() {
|
||||
let delta = &choice.delta;
|
||||
|
||||
// Text content
|
||||
if let Some(content) = &delta.content {
|
||||
if !content.is_empty() {
|
||||
yield Ok(StreamChunk::TextDelta {
|
||||
delta: content.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Tool calls
|
||||
if let Some(tool_calls) = &delta.tool_calls {
|
||||
for tc in tool_calls {
|
||||
// Tool call start
|
||||
if let Some(id) = &tc.id {
|
||||
let name = tc
|
||||
.function
|
||||
.as_ref()
|
||||
.and_then(|f| f.name.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
if !name.is_empty() {
|
||||
current_tool_id = Some(id.clone());
|
||||
accumulated_tool_calls
|
||||
.insert(id.clone(), (name.clone(), String::new()));
|
||||
yield Ok(StreamChunk::ToolUseStart {
|
||||
id: id.clone(),
|
||||
name,
|
||||
});
|
||||
} else {
|
||||
current_tool_id = Some(id.clone());
|
||||
accumulated_tool_calls
|
||||
.insert(id.clone(), (String::new(), String::new()));
|
||||
}
|
||||
}
|
||||
|
||||
// Tool call delta
|
||||
if let Some(function) = &tc.function {
|
||||
if let Some(args) = &function.arguments {
|
||||
let tool_id = tc
|
||||
.id
|
||||
.as_ref()
|
||||
.or(current_tool_id.as_ref())
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
yield Ok(StreamChunk::ToolUseDelta {
|
||||
id: tool_id.clone(),
|
||||
delta: args.clone(),
|
||||
});
|
||||
|
||||
if let Some(entry) =
|
||||
accumulated_tool_calls.get_mut(&tool_id)
|
||||
{
|
||||
entry.1.push_str(args);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
target: "local_driver",
|
||||
"Failed to parse SSE: {}, data: {}",
|
||||
e, data
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Connection-error diagnostics
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Return a human-readable hint when the local server appears to be unreachable.
|
||||
fn connection_error_hint(error: &reqwest::Error) -> String {
|
||||
if error.is_connect() {
|
||||
format!(
|
||||
"\n\nHint: Is the local LLM server running at {}?\n\
|
||||
Make sure the server is started before using this driver.",
|
||||
// Extract just the host:port from whatever error we have.
|
||||
"localhost"
|
||||
)
|
||||
} else if error.is_timeout() {
|
||||
"\n\nHint: The request timed out. Local inference can be slow -- \
|
||||
try a smaller model or increase the timeout."
|
||||
.to_string()
|
||||
} else {
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// OpenAI-compatible API types (private to this module)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct LocalApiRequest {
|
||||
model: String,
|
||||
messages: Vec<LocalApiMessage>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
max_tokens: Option<u32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
temperature: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
stop: Option<Vec<String>>,
|
||||
#[serde(default)]
|
||||
stream: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
tools: Option<Vec<LocalApiTool>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct LocalApiMessage {
|
||||
role: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
content: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
tool_calls: Option<Vec<LocalApiToolCall>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct LocalApiToolCall {
|
||||
id: String,
|
||||
r#type: String,
|
||||
function: LocalFunctionCall,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct LocalFunctionCall {
|
||||
name: String,
|
||||
arguments: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct LocalApiTool {
|
||||
r#type: String,
|
||||
function: LocalFunctionDef,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct LocalFunctionDef {
|
||||
name: String,
|
||||
description: String,
|
||||
parameters: serde_json::Value,
|
||||
}
|
||||
|
||||
// --- Response types ---
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
struct LocalApiResponse {
|
||||
#[serde(default)]
|
||||
choices: Vec<LocalApiChoice>,
|
||||
#[serde(default)]
|
||||
usage: Option<LocalApiUsage>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
struct LocalApiChoice {
|
||||
#[serde(default)]
|
||||
message: LocalApiResponseMessage,
|
||||
#[serde(default)]
|
||||
finish_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
struct LocalApiResponseMessage {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
#[serde(default)]
|
||||
tool_calls: Option<Vec<LocalApiToolCallResponse>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
struct LocalApiToolCallResponse {
|
||||
#[serde(default)]
|
||||
id: String,
|
||||
#[serde(default)]
|
||||
function: LocalFunctionCallResponse,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
struct LocalFunctionCallResponse {
|
||||
#[serde(default)]
|
||||
name: String,
|
||||
#[serde(default)]
|
||||
arguments: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
struct LocalApiUsage {
|
||||
#[serde(default)]
|
||||
prompt_tokens: u32,
|
||||
#[serde(default)]
|
||||
completion_tokens: u32,
|
||||
}
|
||||
|
||||
// --- Streaming types ---
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct LocalStreamResponse {
|
||||
#[serde(default)]
|
||||
choices: Vec<LocalStreamChoice>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct LocalStreamChoice {
|
||||
#[serde(default)]
|
||||
delta: LocalDelta,
|
||||
#[serde(default)]
|
||||
#[allow(dead_code)] // Deserialized from SSE, not accessed in code
|
||||
finish_reason: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Default)]
|
||||
struct LocalDelta {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
#[serde(default)]
|
||||
tool_calls: Option<Vec<LocalToolCallDelta>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct LocalToolCallDelta {
|
||||
#[serde(default)]
|
||||
id: Option<String>,
|
||||
#[serde(default)]
|
||||
function: Option<LocalFunctionDelta>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct LocalFunctionDelta {
|
||||
#[serde(default)]
|
||||
name: Option<String>,
|
||||
#[serde(default)]
|
||||
arguments: Option<String>,
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ pub mod loop_runner;
|
||||
pub mod loop_guard;
|
||||
pub mod stream;
|
||||
pub mod growth;
|
||||
pub mod compaction;
|
||||
|
||||
// Re-export main types
|
||||
pub use driver::{
|
||||
|
||||
@@ -11,6 +11,7 @@ use crate::tool::{ToolRegistry, ToolContext, SkillExecutor};
|
||||
use crate::tool::builtin::PathValidator;
|
||||
use crate::loop_guard::LoopGuard;
|
||||
use crate::growth::GrowthIntegration;
|
||||
use crate::compaction;
|
||||
use zclaw_memory::MemoryStore;
|
||||
|
||||
/// Agent loop runner
|
||||
@@ -29,6 +30,8 @@ pub struct AgentLoop {
|
||||
path_validator: Option<PathValidator>,
|
||||
/// Growth system integration (optional)
|
||||
growth: Option<GrowthIntegration>,
|
||||
/// Compaction threshold in tokens (0 = disabled)
|
||||
compaction_threshold: usize,
|
||||
}
|
||||
|
||||
impl AgentLoop {
|
||||
@@ -51,6 +54,7 @@ impl AgentLoop {
|
||||
skill_executor: None,
|
||||
path_validator: None,
|
||||
growth: None,
|
||||
compaction_threshold: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,6 +105,16 @@ impl AgentLoop {
|
||||
self.growth = Some(growth);
|
||||
}
|
||||
|
||||
/// Set compaction threshold in tokens (0 = disabled)
|
||||
///
|
||||
/// When the estimated token count of conversation history exceeds this
|
||||
/// threshold, older messages are summarized into a single system message
|
||||
/// and only recent messages are sent to the LLM.
|
||||
pub fn with_compaction_threshold(mut self, threshold: usize) -> Self {
|
||||
self.compaction_threshold = threshold;
|
||||
self
|
||||
}
|
||||
|
||||
/// Get growth integration reference
|
||||
pub fn growth(&self) -> Option<&GrowthIntegration> {
|
||||
self.growth.as_ref()
|
||||
@@ -134,6 +148,11 @@ impl AgentLoop {
|
||||
// Get all messages for context
|
||||
let mut messages = self.memory.get_messages(&session_id).await?;
|
||||
|
||||
// Apply compaction if threshold is configured
|
||||
if self.compaction_threshold > 0 {
|
||||
messages = compaction::maybe_compact(messages, self.compaction_threshold);
|
||||
}
|
||||
|
||||
// Enhance system prompt with growth memories
|
||||
let enhanced_prompt = if let Some(ref growth) = self.growth {
|
||||
let base = self.system_prompt.as_deref().unwrap_or("");
|
||||
@@ -260,7 +279,12 @@ impl AgentLoop {
|
||||
self.memory.append_message(&session_id, &user_message).await?;
|
||||
|
||||
// Get all messages for context
|
||||
let messages = self.memory.get_messages(&session_id).await?;
|
||||
let mut messages = self.memory.get_messages(&session_id).await?;
|
||||
|
||||
// Apply compaction if threshold is configured
|
||||
if self.compaction_threshold > 0 {
|
||||
messages = compaction::maybe_compact(messages, self.compaction_threshold);
|
||||
}
|
||||
|
||||
// Enhance system prompt with growth memories
|
||||
let enhanced_prompt = if let Some(ref growth) = self.growth {
|
||||
|
||||
Reference in New Issue
Block a user