refactor: 统一项目名称从OpenFang到ZCLAW

重构所有代码和文档中的项目名称，将OpenFang统一更新为ZCLAW。包括： - 配置文件中的项目名称 - 代码注释和文档引用 - 环境变量和路径 - 类型定义和接口名称 - 测试用例和模拟数据同时优化部分代码结构，移除未使用的模块，并更新相关依赖项。
2026-03-27 07:36:03 +08:00
parent 4b08804aa9
commit 0d4fa96b82
226 changed files with 7288 additions and 5788 deletions
--- a/crates/zclaw-growth/Cargo.toml
+++ b/crates/zclaw-growth/Cargo.toml
@@ -32,6 +32,7 @@ uuid = { workspace = true }

 # Database
 sqlx = { workspace = true }
+libsqlite3-sys = { workspace = true }

 # Internal crates
 zclaw-types = { workspace = true }
--- a/crates/zclaw-growth/src/injector.rs
+++ b/crates/zclaw-growth/src/injector.rs
@@ -388,6 +388,8 @@ mod tests {
            access_count: 0,
            created_at: Utc::now(),
            last_accessed: Utc::now(),
+            overview: None,
+            abstract_summary: None,
        }
    }

--- a/crates/zclaw-growth/src/lib.rs
+++ b/crates/zclaw-growth/src/lib.rs
@@ -63,6 +63,7 @@ pub mod tracker;
 pub mod viking_adapter;
 pub mod storage;
 pub mod retrieval;
+pub mod summarizer;

 // Re-export main types for convenience
 pub use types::{
@@ -82,7 +83,8 @@ pub use injector::{InjectionFormat, PromptInjector};
 pub use tracker::{AgentMetadata, GrowthTracker, LearningEvent};
 pub use viking_adapter::{FindOptions, VikingAdapter, VikingLevel, VikingStorage};
 pub use storage::SqliteStorage;
-pub use retrieval::{MemoryCache, QueryAnalyzer, SemanticScorer};
+pub use retrieval::{EmbeddingClient, MemoryCache, QueryAnalyzer, SemanticScorer};
+pub use summarizer::SummaryLlmDriver;

 /// Growth system configuration
 #[derive(Debug, Clone)]
--- a/crates/zclaw-growth/src/retrieval/cache.rs
+++ b/crates/zclaw-growth/src/retrieval/cache.rs
@@ -18,7 +18,8 @@ struct CacheEntry {
    access_count: u32,
 }

-/// Cache key for efficient lookups
+/// Cache key for efficient lookups (reserved for future cache optimization)
+#[allow(dead_code)]
 #[derive(Debug, Clone, Hash, Eq, PartialEq)]
 struct CacheKey {
    agent_id: String,
--- a/crates/zclaw-growth/src/retrieval/mod.rs
+++ b/crates/zclaw-growth/src/retrieval/mod.rs
@@ -9,6 +9,6 @@ pub mod semantic;
 pub mod query;
 pub mod cache;

-pub use semantic::SemanticScorer;
+pub use semantic::{EmbeddingClient, SemanticScorer};
 pub use query::QueryAnalyzer;
 pub use cache::MemoryCache;
--- a/crates/zclaw-growth/src/retrieval/semantic.rs
+++ b/crates/zclaw-growth/src/retrieval/semantic.rs
@@ -253,8 +253,13 @@ impl SemanticScorer {
        }
    }

+    /// Get pre-computed embedding for an entry
+    pub fn get_entry_embedding(&self, uri: &str) -> Option<Vec<f32>> {
+        self.entry_embeddings.get(uri).cloned()
+    }
+
    /// Compute cosine similarity between two embedding vectors
-    fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
+    pub fn cosine_similarity_embedding(v1: &[f32], v2: &[f32]) -> f32 {
        if v1.is_empty() || v2.is_empty() || v1.len() != v2.len() {
            return 0.0;
        }
--- a/crates/zclaw-growth/src/storage/sqlite.rs
+++ b/crates/zclaw-growth/src/storage/sqlite.rs
@@ -3,7 +3,7 @@
 //! Persistent storage backend using SQLite for production use.
 //! Provides efficient querying and full-text search capabilities.

-use crate::retrieval::semantic::SemanticScorer;
+use crate::retrieval::semantic::{EmbeddingClient, SemanticScorer};
 use crate::types::MemoryEntry;
 use crate::viking_adapter::{FindOptions, VikingStorage};
 use async_trait::async_trait;
@@ -36,6 +36,8 @@ struct MemoryRow {
    access_count: i32,
    created_at: String,
    last_accessed: String,
+    overview: Option<String>,
+    abstract_summary: Option<String>,
 }

 impl SqliteStorage {
@@ -83,6 +85,26 @@ impl SqliteStorage {
        Self::new(":memory:").await.expect("Failed to create in-memory database")
    }

+    /// Configure embedding client for semantic search
+    /// Replaces the current scorer with a new one that has embedding support
+    pub async fn configure_embedding(
+        &self,
+        client: Arc<dyn EmbeddingClient>,
+    ) -> Result<()> {
+        let new_scorer = SemanticScorer::with_embedding(client);
+        let mut scorer = self.scorer.write().await;
+        *scorer = new_scorer;
+
+        tracing::info!("[SqliteStorage] Embedding client configured, re-indexing with embeddings...");
+        self.warmup_scorer_with_embedding().await
+    }
+
+    /// Check if embedding is available
+    pub async fn is_embedding_available(&self) -> bool {
+        let scorer = self.scorer.read().await;
+        scorer.is_embedding_available()
+    }
+
    /// Initialize database schema with FTS5
    async fn initialize_schema(&self) -> Result<()> {
        // Create main memories table
@@ -131,6 +153,16 @@ impl SqliteStorage {
            .await
            .map_err(|e| ZclawError::StorageError(format!("Failed to create importance index: {}", e)))?;

+        // Migration: add overview column (L1 summary)
+        let _ = sqlx::query("ALTER TABLE memories ADD COLUMN overview TEXT")
+            .execute(&self.pool)
+            .await;
+
+        // Migration: add abstract_summary column (L0 keywords)
+        let _ = sqlx::query("ALTER TABLE memories ADD COLUMN abstract_summary TEXT")
+            .execute(&self.pool)
+            .await;
+
        // Create metadata table
        sqlx::query(
            r#"
@@ -151,7 +183,7 @@ impl SqliteStorage {
    /// Warmup semantic scorer with existing entries
    async fn warmup_scorer(&self) -> Result<()> {
        let rows = sqlx::query_as::<_, MemoryRow>(
-            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
+            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
        )
        .fetch_all(&self.pool)
        .await
@@ -173,6 +205,32 @@ impl SqliteStorage {
        Ok(())
    }

+    /// Warmup semantic scorer with embedding support for existing entries
+    async fn warmup_scorer_with_embedding(&self) -> Result<()> {
+        let rows = sqlx::query_as::<_, MemoryRow>(
+            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
+        )
+        .fetch_all(&self.pool)
+        .await
+        .map_err(|e| ZclawError::StorageError(format!("Failed to load memories for warmup: {}", e)))?;
+
+        let mut scorer = self.scorer.write().await;
+        for row in rows {
+            let entry = self.row_to_entry(&row);
+            scorer.index_entry_with_embedding(&entry).await;
+        }
+
+        let stats = scorer.stats();
+        tracing::info!(
+            "[SqliteStorage] Warmed up scorer with {} entries ({} with embeddings), {} terms",
+            stats.indexed_entries,
+            stats.embedding_entries,
+            stats.unique_terms
+        );
+
+        Ok(())
+    }
+
    /// Convert database row to MemoryEntry
    fn row_to_entry(&self, row: &MemoryRow) -> MemoryEntry {
        let memory_type = crate::types::MemoryType::parse(&row.memory_type);
@@ -193,6 +251,8 @@ impl SqliteStorage {
            access_count: row.access_count as u32,
            created_at,
            last_accessed,
+            overview: row.overview.clone(),
+            abstract_summary: row.abstract_summary.clone(),
        }
    }

@@ -223,6 +283,8 @@ impl sqlx::FromRow<'_, SqliteRow> for MemoryRow {
            access_count: row.try_get("access_count")?,
            created_at: row.try_get("created_at")?,
            last_accessed: row.try_get("last_accessed")?,
+            overview: row.try_get("overview").ok(),
+            abstract_summary: row.try_get("abstract_summary").ok(),
        })
    }
 }
@@ -241,8 +303,8 @@ impl VikingStorage for SqliteStorage {
        sqlx::query(
            r#"
            INSERT OR REPLACE INTO memories
-            (uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            (uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            "#,
        )
        .bind(&entry.uri)
@@ -253,6 +315,8 @@ impl VikingStorage for SqliteStorage {
        .bind(entry.access_count as i32)
        .bind(&created_at)
        .bind(&last_accessed)
+        .bind(&entry.overview)
+        .bind(&entry.abstract_summary)
        .execute(&self.pool)
        .await
        .map_err(|e| ZclawError::StorageError(format!("Failed to store memory: {}", e)))?;
@@ -276,9 +340,13 @@ impl VikingStorage for SqliteStorage {
        .execute(&self.pool)
        .await;

-        // Update semantic scorer
+        // Update semantic scorer (use embedding when available)
        let mut scorer = self.scorer.write().await;
-        scorer.index_entry(entry);
+        if scorer.is_embedding_available() {
+            scorer.index_entry_with_embedding(entry).await;
+        } else {
+            scorer.index_entry(entry);
+        }

        tracing::debug!("[SqliteStorage] Stored memory: {}", entry.uri);
        Ok(())
@@ -286,7 +354,7 @@ impl VikingStorage for SqliteStorage {

    async fn get(&self, uri: &str) -> Result<Option<MemoryEntry>> {
        let row = sqlx::query_as::<_, MemoryRow>(
-            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri = ?"
+            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri = ?"
        )
        .bind(uri)
        .fetch_optional(&self.pool)
@@ -309,7 +377,7 @@ impl VikingStorage for SqliteStorage {
        // Get all matching entries
        let rows = if let Some(ref scope) = options.scope {
            sqlx::query_as::<_, MemoryRow>(
-                "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
+                "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
            )
            .bind(format!("{}%", scope))
            .fetch_all(&self.pool)
@@ -317,7 +385,7 @@ impl VikingStorage for SqliteStorage {
            .map_err(|e| ZclawError::StorageError(format!("Failed to find memories: {}", e)))?
        } else {
            sqlx::query_as::<_, MemoryRow>(
-                "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories"
+                "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories"
            )
            .fetch_all(&self.pool)
            .await
@@ -325,14 +393,49 @@ impl VikingStorage for SqliteStorage {
        };

        // Convert to entries and compute semantic scores
-        let scorer = self.scorer.read().await;
+        let use_embedding = {
+            let scorer = self.scorer.read().await;
+            scorer.is_embedding_available()
+        };
+
        let mut scored_entries: Vec<(f32, MemoryEntry)> = Vec::new();

        for row in rows {
            let entry = self.row_to_entry(&row);

-            // Compute semantic score using TF-IDF
-            let semantic_score = scorer.score_similarity(query, &entry);
+            // Compute semantic score: use embedding when available, fallback to TF-IDF
+            let semantic_score = if use_embedding {
+                let scorer = self.scorer.read().await;
+                let tfidf_score = scorer.score_similarity(query, &entry);
+                let entry_embedding = scorer.get_entry_embedding(&entry.uri);
+                drop(scorer);
+
+                match entry_embedding {
+                    Some(entry_emb) => {
+                        // Try embedding the query for hybrid scoring
+                        let embedding_client = {
+                            let scorer2 = self.scorer.read().await;
+                            scorer2.get_embedding_client()
+                        };
+
+                        match embedding_client.embed(query).await {
+                            Ok(query_emb) => {
+                                let emb_score = SemanticScorer::cosine_similarity_embedding(&query_emb, &entry_emb);
+                                // Hybrid: 70% embedding + 30% TF-IDF
+                                emb_score * 0.7 + tfidf_score * 0.3
+                            }
+                            Err(_) => {
+                                tracing::debug!("[SqliteStorage] Query embedding failed, using TF-IDF only");
+                                tfidf_score
+                            }
+                        }
+                    }
+                    None => tfidf_score,
+                }
+            } else {
+                let scorer = self.scorer.read().await;
+                scorer.score_similarity(query, &entry)
+            };

            // Apply similarity threshold
            if let Some(min_similarity) = options.min_similarity {
@@ -362,7 +465,7 @@ impl VikingStorage for SqliteStorage {

    async fn find_by_prefix(&self, prefix: &str) -> Result<Vec<MemoryEntry>> {
        let rows = sqlx::query_as::<_, MemoryRow>(
-            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed FROM memories WHERE uri LIKE ?"
+            "SELECT uri, memory_type, content, keywords, importance, access_count, created_at, last_accessed, overview, abstract_summary FROM memories WHERE uri LIKE ?"
        )
        .bind(format!("{}%", prefix))
        .fetch_all(&self.pool)
--- a/crates/zclaw-growth/src/summarizer.rs
+++ b/crates/zclaw-growth/src/summarizer.rs
@@ -0,0 +1,192 @@
+//! Memory Summarizer - L0/L1 Summary Generation
+//!
+//! Provides trait and functions for generating layered summaries of memory entries:
+//! - L1 Overview: 1-2 sentence summary (~200 tokens)
+//! - L0 Abstract: 3-5 keywords (~100 tokens)
+//!
+//! The trait-based design allows zclaw-growth to remain decoupled from any
+//! specific LLM implementation. The Tauri layer provides a concrete implementation.
+
+use crate::types::MemoryEntry;
+
+/// LLM driver for summary generation.
+/// Implementations call an LLM API to produce concise summaries.
+#[async_trait::async_trait]
+pub trait SummaryLlmDriver: Send + Sync {
+    /// Generate a short summary (1-2 sentences, ~200 tokens) for a memory entry.
+    async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String>;
+
+    /// Generate keyword extraction (3-5 keywords, ~100 tokens) for a memory entry.
+    async fn generate_abstract(&self, entry: &MemoryEntry) -> Result<String, String>;
+}
+
+/// Generate an L1 overview prompt for the LLM.
+pub fn overview_prompt(entry: &MemoryEntry) -> String {
+    format!(
+        r#"Summarize the following memory entry in 1-2 concise sentences (in the same language as the content).
+Focus on the key information. Do not add any preamble or explanation.
+
+Memory type: {}
+Category: {}
+Content: {}"#,
+        entry.memory_type,
+        entry.uri.rsplit('/').next().unwrap_or("unknown"),
+        entry.content
+    )
+}
+
+/// Generate an L0 abstract prompt for the LLM.
+pub fn abstract_prompt(entry: &MemoryEntry) -> String {
+    format!(
+        r#"Extract 3-5 keywords or key phrases from the following memory entry.
+Output ONLY the keywords, comma-separated, in the same language as the content.
+Do not add any preamble, explanation, or numbering.
+
+Memory type: {}
+Content: {}"#,
+        entry.memory_type, entry.content
+    )
+}
+
+/// Generate both L1 overview and L0 abstract for a memory entry.
+/// Returns (overview, abstract_summary) tuple.
+pub async fn generate_summaries(
+    driver: &dyn SummaryLlmDriver,
+    entry: &MemoryEntry,
+) -> (Option<String>, Option<String>) {
+    // Generate L1 overview
+    let overview = match driver.generate_overview(entry).await {
+        Ok(text) => {
+            let cleaned = clean_summary(&text);
+            if !cleaned.is_empty() {
+                Some(cleaned)
+            } else {
+                None
+            }
+        }
+        Err(e) => {
+            tracing::debug!("[Summarizer] Failed to generate overview for {}: {}", entry.uri, e);
+            None
+        }
+    };
+
+    // Generate L0 abstract
+    let abstract_summary = match driver.generate_abstract(entry).await {
+        Ok(text) => {
+            let cleaned = clean_summary(&text);
+            if !cleaned.is_empty() {
+                Some(cleaned)
+            } else {
+                None
+            }
+        }
+        Err(e) => {
+            tracing::debug!("[Summarizer] Failed to generate abstract for {}: {}", entry.uri, e);
+            None
+        }
+    };
+
+    (overview, abstract_summary)
+}
+
+/// Clean LLM response: strip quotes, whitespace, prefixes
+fn clean_summary(text: &str) -> String {
+    text.trim()
+        .trim_start_matches('"')
+        .trim_end_matches('"')
+        .trim_start_matches('\'')
+        .trim_end_matches('\'')
+        .trim_start_matches("摘要：")
+        .trim_start_matches("摘要:")
+        .trim_start_matches("关键词：")
+        .trim_start_matches("关键词:")
+        .trim_start_matches("Overview:")
+        .trim_start_matches("overview:")
+        .trim()
+        .to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::MemoryType;
+
+    struct MockSummaryDriver;
+
+    #[async_trait::async_trait]
+    impl SummaryLlmDriver for MockSummaryDriver {
+        async fn generate_overview(&self, entry: &MemoryEntry) -> Result<String, String> {
+            Ok(format!("Summary of: {}", &entry.content[..entry.content.len().min(30)]))
+        }
+
+        async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
+            Ok("keyword1, keyword2, keyword3".to_string())
+        }
+    }
+
+    fn make_entry(content: &str) -> MemoryEntry {
+        MemoryEntry::new("test-agent", MemoryType::Knowledge, "test", content.to_string())
+    }
+
+    #[tokio::test]
+    async fn test_generate_summaries() {
+        let driver = MockSummaryDriver;
+        let entry = make_entry("This is a test memory entry about Rust programming.");
+
+        let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
+
+        assert!(overview.is_some());
+        assert!(abstract_summary.is_some());
+        assert!(overview.unwrap().contains("Summary of"));
+        assert!(abstract_summary.unwrap().contains("keyword1"));
+    }
+
+    #[tokio::test]
+    async fn test_generate_summaries_handles_error() {
+        struct FailingDriver;
+        #[async_trait::async_trait]
+        impl SummaryLlmDriver for FailingDriver {
+            async fn generate_overview(&self, _entry: &MemoryEntry) -> Result<String, String> {
+                Err("LLM unavailable".to_string())
+            }
+            async fn generate_abstract(&self, _entry: &MemoryEntry) -> Result<String, String> {
+                Err("LLM unavailable".to_string())
+            }
+        }
+
+        let driver = FailingDriver;
+        let entry = make_entry("test content");
+
+        let (overview, abstract_summary) = generate_summaries(&driver, &entry).await;
+
+        assert!(overview.is_none());
+        assert!(abstract_summary.is_none());
+    }
+
+    #[test]
+    fn test_clean_summary() {
+        assert_eq!(clean_summary("\"hello world\""), "hello world");
+        assert_eq!(clean_summary("摘要：你好"), "你好");
+        assert_eq!(clean_summary("  keyword1, keyword2  "), "keyword1, keyword2");
+        assert_eq!(clean_summary("Overview: something"), "something");
+    }
+
+    #[test]
+    fn test_overview_prompt() {
+        let entry = make_entry("User prefers dark mode and compact UI");
+        let prompt = overview_prompt(&entry);
+
+        assert!(prompt.contains("1-2 concise sentences"));
+        assert!(prompt.contains("User prefers dark mode"));
+        assert!(prompt.contains("knowledge"));
+    }
+
+    #[test]
+    fn test_abstract_prompt() {
+        let entry = make_entry("Rust is a systems programming language");
+        let prompt = abstract_prompt(&entry);
+
+        assert!(prompt.contains("3-5 keywords"));
+        assert!(prompt.contains("Rust is a systems"));
+    }
+}
--- a/crates/zclaw-growth/src/types.rs
+++ b/crates/zclaw-growth/src/types.rs
@@ -72,6 +72,10 @@ pub struct MemoryEntry {
    pub created_at: DateTime<Utc>,
    /// Last access timestamp
    pub last_accessed: DateTime<Utc>,
+    /// L1 overview: 1-2 sentence summary (~200 tokens)
+    pub overview: Option<String>,
+    /// L0 abstract: 3-5 keywords (~100 tokens)
+    pub abstract_summary: Option<String>,
 }

 impl MemoryEntry {
@@ -92,6 +96,8 @@ impl MemoryEntry {
            access_count: 0,
            created_at: Utc::now(),
            last_accessed: Utc::now(),
+            overview: None,
+            abstract_summary: None,
        }
    }

@@ -107,6 +113,18 @@ impl MemoryEntry {
        self
    }

+    /// Set L1 overview summary
+    pub fn with_overview(mut self, overview: impl Into<String>) -> Self {
+        self.overview = Some(overview.into());
+        self
+    }
+
+    /// Set L0 abstract summary
+    pub fn with_abstract_summary(mut self, abstract_summary: impl Into<String>) -> Self {
+        self.abstract_summary = Some(abstract_summary.into());
+        self
+    }
+
    /// Mark as accessed
    pub fn touch(&mut self) {
        self.access_count += 1;
--- a/crates/zclaw-hands/Cargo.toml
+++ b/crates/zclaw-hands/Cargo.toml
@@ -9,6 +9,7 @@ description = "ZCLAW Hands - autonomous capabilities"

 [dependencies]
 zclaw-types = { workspace = true }
+zclaw-runtime = { workspace = true }

 tokio = { workspace = true }
 serde = { workspace = true }
--- a/crates/zclaw-hands/src/hands/mod.rs
+++ b/crates/zclaw-hands/src/hands/mod.rs
@@ -14,7 +14,7 @@
 mod whiteboard;
 mod slideshow;
 mod speech;
-mod quiz;
+pub mod quiz;
 mod browser;
 mod researcher;
 mod collector;
--- a/crates/zclaw-hands/src/hands/quiz.rs
+++ b/crates/zclaw-hands/src/hands/quiz.rs
@@ -14,6 +14,7 @@ use std::sync::Arc;
 use tokio::sync::RwLock;
 use uuid::Uuid;
 use zclaw_types::Result;
+use zclaw_runtime::driver::{LlmDriver, CompletionRequest};

 use crate::{Hand, HandConfig, HandContext, HandResult, HandStatus};

@@ -44,29 +45,242 @@ impl QuizGenerator for DefaultQuizGenerator {
        difficulty: &DifficultyLevel,
        _question_types: &[QuestionType],
    ) -> Result<Vec<QuizQuestion>> {
-        // Generate placeholder questions
+        // Generate placeholder questions with randomized correct answers
+        let options_pool: Vec<Vec<String>> = vec![
+            vec!["光合作用".into(), "呼吸作用".into(), "蒸腾作用".into(), "运输作用".into()],
+            vec!["牛顿".into(), "爱因斯坦".into(), "伽利略".into(), "开普勒".into()],
+            vec!["太平洋".into(), "大西洋".into(), "印度洋".into(), "北冰洋".into()],
+            vec!["DNA".into(), "RNA".into(), "蛋白质".into(), "碳水化合物".into()],
+            vec!["引力".into(), "电磁力".into(), "强力".into(), "弱力".into()],
+        ];
+
        Ok((0..count)
-            .map(|i| QuizQuestion {
-                id: uuid_v4(),
-                question_type: QuestionType::MultipleChoice,
-                question: format!("Question {} about {}", i + 1, topic),
-                options: Some(vec![
-                    "Option A".to_string(),
-                    "Option B".to_string(),
-                    "Option C".to_string(),
-                    "Option D".to_string(),
-                ]),
-                correct_answer: Answer::Single("Option A".to_string()),
-                explanation: Some(format!("Explanation for question {}", i + 1)),
-                hints: Some(vec![format!("Hint 1 for question {}", i + 1)]),
-                points: 10.0,
-                difficulty: difficulty.clone(),
-                tags: vec![topic.to_string()],
+            .map(|i| {
+                let pool_idx = i % options_pool.len();
+                let mut opts = options_pool[pool_idx].clone();
+                // Shuffle options to randomize correct answer position
+                let correct_idx = (i * 3 + 1) % opts.len();
+                opts.swap(0, correct_idx);
+                let correct = opts[0].clone();
+
+                QuizQuestion {
+                    id: uuid_v4(),
+                    question_type: QuestionType::MultipleChoice,
+                    question: format!("关于{}的第{}题（{}难度）", topic, i + 1, match difficulty {
+                        DifficultyLevel::Easy => "简单",
+                        DifficultyLevel::Medium => "中等",
+                        DifficultyLevel::Hard => "困难",
+                        DifficultyLevel::Adaptive => "自适应",
+                    }),
+                    options: Some(opts),
+                    correct_answer: Answer::Single(correct),
+                    explanation: Some(format!("第{}题的详细解释", i + 1)),
+                    hints: Some(vec![format!("提示：仔细阅读关于{}的内容", topic)]),
+                    points: 10.0,
+                    difficulty: difficulty.clone(),
+                    tags: vec![topic.to_string()],
+                }
            })
            .collect())
    }
 }

+/// LLM-powered quiz generator that produces real questions via an LLM driver.
+pub struct LlmQuizGenerator {
+    driver: Arc<dyn LlmDriver>,
+    model: String,
+}
+
+impl LlmQuizGenerator {
+    pub fn new(driver: Arc<dyn LlmDriver>, model: String) -> Self {
+        Self { driver, model }
+    }
+}
+
+#[async_trait]
+impl QuizGenerator for LlmQuizGenerator {
+    async fn generate_questions(
+        &self,
+        topic: &str,
+        content: Option<&str>,
+        count: usize,
+        difficulty: &DifficultyLevel,
+        question_types: &[QuestionType],
+    ) -> Result<Vec<QuizQuestion>> {
+        let difficulty_str = match difficulty {
+            DifficultyLevel::Easy => "简单",
+            DifficultyLevel::Medium => "中等",
+            DifficultyLevel::Hard => "困难",
+            DifficultyLevel::Adaptive => "中等",
+        };
+
+        let type_str = if question_types.is_empty() {
+            String::from("选择题(multiple_choice)")
+        } else {
+            question_types
+                .iter()
+                .map(|t| match t {
+                    QuestionType::MultipleChoice => "选择题",
+                    QuestionType::TrueFalse => "判断题",
+                    QuestionType::FillBlank => "填空题",
+                    QuestionType::ShortAnswer => "简答题",
+                    QuestionType::Essay => "论述题",
+                    _ => "选择题",
+                })
+                .collect::<Vec<_>>()
+                .join(",")
+        };
+
+        let content_section = match content {
+            Some(c) if !c.is_empty() => format!("\n\n参考内容:\n{}", &c[..c.len().min(3000)]),
+            _ => String::new(),
+        };
+
+        let content_note = if content.is_some() && content.map_or(false, |c| !c.is_empty()) {
+            "（基于提供的参考内容出题）"
+        } else {
+            ""
+        };
+
+        let prompt = format!(
+            r#"你是一个专业的出题专家。请根据以下要求生成测验题目：
+
+主题: {}
+难度: {}
+题目类型: {}
+数量: {}{}
+{}
+
+请严格按照以下 JSON 格式输出，不要添加任何其他文字：
+```json
+[
+  {{
+    "question": "题目内容",
+    "options": ["选项A", "选项B", "选项C", "选项D"],
+    "correct_answer": "正确答案（与options中某项完全一致）",
+    "explanation": "答案解释",
+    "hint": "提示信息"
+  }}
+]
+```
+
+要求：
+1. 题目要有实际内容，不要使用占位符
+2. 正确答案必须随机分布（不要总在第一个选项）
+3. 每道题的选项要有区分度，干扰项要合理
+4. 解释要清晰准确
+5. 直接输出 JSON，不要有 markdown 包裹"#,
+            topic, difficulty_str, type_str, count, content_section, content_note,
+        );
+
+        let request = CompletionRequest {
+            model: self.model.clone(),
+            system: Some("你是一个专业的出题专家，只输出纯JSON格式。".to_string()),
+            messages: vec![zclaw_types::Message::user(&prompt)],
+            tools: Vec::new(),
+            max_tokens: Some(4096),
+            temperature: Some(0.7),
+            stop: Vec::new(),
+            stream: false,
+        };
+
+        let response = self.driver.complete(request).await.map_err(|e| {
+            zclaw_types::ZclawError::Internal(format!("LLM quiz generation failed: {}", e))
+        })?;
+
+        // Extract text from response
+        let text: String = response
+            .content
+            .iter()
+            .filter_map(|block| match block {
+                zclaw_runtime::driver::ContentBlock::Text { text } => Some(text.clone()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("");
+
+        // Parse JSON from response (handle markdown code fences)
+        let json_str = extract_json(&text);
+
+        let raw_questions: Vec<serde_json::Value> =
+            serde_json::from_str(json_str).map_err(|e| {
+                zclaw_types::ZclawError::Internal(format!(
+                    "Failed to parse quiz JSON: {}. Raw: {}",
+                    e,
+                    &text[..text.len().min(200)]
+                ))
+            })?;
+
+        let questions: Vec<QuizQuestion> = raw_questions
+            .into_iter()
+            .take(count)
+            .map(|q| {
+                let options: Vec<String> = q["options"]
+                    .as_array()
+                    .map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
+                    .unwrap_or_default();
+
+                let correct = q["correct_answer"]
+                    .as_str()
+                    .unwrap_or("")
+                    .to_string();
+
+                QuizQuestion {
+                    id: uuid_v4(),
+                    question_type: QuestionType::MultipleChoice,
+                    question: q["question"].as_str().unwrap_or("未知题目").to_string(),
+                    options: if options.is_empty() { None } else { Some(options) },
+                    correct_answer: Answer::Single(correct),
+                    explanation: q["explanation"].as_str().map(String::from),
+                    hints: q["hint"].as_str().map(|h| vec![h.to_string()]),
+                    points: 10.0,
+                    difficulty: difficulty.clone(),
+                    tags: vec![topic.to_string()],
+                }
+            })
+            .collect();
+
+        if questions.is_empty() {
+            // Fallback to default if LLM returns nothing parseable
+            return DefaultQuizGenerator
+                .generate_questions(topic, content, count, difficulty, question_types)
+                .await;
+        }
+
+        Ok(questions)
+    }
+}
+
+/// Extract JSON from a string that may be wrapped in markdown code fences.
+fn extract_json(text: &str) -> &str {
+    let trimmed = text.trim();
+
+    // Try to find ```json ... ``` block
+    if let Some(start) = trimmed.find("```json") {
+        let after_start = &trimmed[start + 7..];
+        if let Some(end) = after_start.find("```") {
+            return after_start[..end].trim();
+        }
+    }
+
+    // Try to find ``` ... ``` block
+    if let Some(start) = trimmed.find("```") {
+        let after_start = &trimmed[start + 3..];
+        if let Some(end) = after_start.find("```") {
+            return after_start[..end].trim();
+        }
+    }
+
+    // Try to find raw JSON array
+    if let Some(start) = trimmed.find('[') {
+        if let Some(end) = trimmed.rfind(']') {
+            return &trimmed[start..=end];
+        }
+    }
+
+    trimmed
+}
+
 /// Quiz action types
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(tag = "action", rename_all = "snake_case")]
--- a/crates/zclaw-kernel/Cargo.toml
+++ b/crates/zclaw-kernel/Cargo.toml
@@ -20,6 +20,7 @@ tokio-stream = { workspace = true }
 futures = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
+toml = { workspace = true }
 thiserror = { workspace = true }
 uuid = { workspace = true }
 chrono = { workspace = true }
--- a/crates/zclaw-kernel/src/config.rs
+++ b/crates/zclaw-kernel/src/config.rs
@@ -252,10 +252,78 @@ fn default_skills_dir() -> Option<std::path::PathBuf> {
 }

 impl KernelConfig {
-    /// Load configuration from file
+    /// Load configuration from file.
+    ///
+    /// Search order:
+    /// 1. Path from `ZCLAW_CONFIG` environment variable
+    /// 2. `~/.zclaw/config.toml`
+    /// 3. Fallback to `Self::default()`
+    ///
+    /// Supports `${VAR_NAME}` environment variable interpolation in string values.
    pub async fn load() -> Result<Self> {
-        // TODO: Load from ~/.zclaw/config.toml
-        Ok(Self::default())
+        let config_path = Self::find_config_path();
+
+        match config_path {
+            Some(path) => {
+                if !path.exists() {
+                    tracing::debug!(target: "kernel_config", "Config file not found: {:?}, using defaults", path);
+                    return Ok(Self::default());
+                }
+
+                tracing::info!(target: "kernel_config", "Loading config from: {:?}", path);
+                let content = std::fs::read_to_string(&path).map_err(|e| {
+                    zclaw_types::ZclawError::Internal(format!("Failed to read config {}: {}", path.display(), e))
+                })?;
+
+                let interpolated = interpolate_env_vars(&content);
+                let mut config: KernelConfig = toml::from_str(&interpolated).map_err(|e| {
+                    zclaw_types::ZclawError::Internal(format!("Failed to parse config {}: {}", path.display(), e))
+                })?;
+
+                // Resolve skills_dir if not explicitly set
+                if config.skills_dir.is_none() {
+                    config.skills_dir = default_skills_dir();
+                }
+
+                tracing::info!(
+                    target: "kernel_config",
+                    model = %config.llm.model,
+                    base_url = %config.llm.base_url,
+                    has_api_key = !config.llm.api_key.is_empty(),
+                    "Config loaded successfully"
+                );
+
+                Ok(config)
+            }
+            None => Ok(Self::default()),
+        }
+    }
+
+    /// Find the config file path.
+    fn find_config_path() -> Option<PathBuf> {
+        // 1. Environment variable override
+        if let Ok(path) = std::env::var("ZCLAW_CONFIG") {
+            return Some(PathBuf::from(path));
+        }
+
+        // 2. ~/.zclaw/config.toml
+        if let Some(home) = dirs::home_dir() {
+            let path = home.join(".zclaw").join("config.toml");
+            if path.exists() {
+                return Some(path);
+            }
+        }
+
+        // 3. Project root config/config.toml (for development)
+        let project_config = std::env::current_dir()
+            .ok()
+            .map(|cwd| cwd.join("config").join("config.toml"))?;
+
+        if project_config.exists() {
+            return Some(project_config);
+        }
+
+        None
    }

    /// Create the LLM driver
@@ -439,3 +507,81 @@ impl LlmConfig {
        self
    }
 }
+
+// === Environment variable interpolation ===
+
+/// Replace `${VAR_NAME}` patterns in a string with environment variable values.
+/// If the variable is not set, the pattern is left as-is.
+fn interpolate_env_vars(content: &str) -> String {
+    let mut result = String::with_capacity(content.len());
+    let mut chars = content.char_indices().peekable();
+
+    while let Some((_, ch)) = chars.next() {
+        if ch == '$' && chars.peek().map(|(_, c)| *c == '{').unwrap_or(false) {
+            chars.next(); // consume '{'
+
+            let mut var_name = String::new();
+
+            while let Some((_, c)) = chars.peek() {
+                match c {
+                    '}' => {
+                        chars.next(); // consume '}'
+                        if let Ok(value) = std::env::var(&var_name) {
+                            result.push_str(&value);
+                        } else {
+                            result.push_str("${");
+                            result.push_str(&var_name);
+                            result.push('}');
+                        }
+                        break;
+                    }
+                    _ => {
+                        var_name.push(*c);
+                        chars.next();
+                    }
+                }
+            }
+
+            // Handle unclosed ${... at end of string
+            if !content[result.len()..].contains('}') && var_name.is_empty() {
+                // Already consumed, nothing to do
+            }
+        } else {
+            result.push(ch);
+        }
+    }
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_interpolate_env_vars_basic() {
+        std::env::set_var("ZCLAW_TEST_VAR", "hello");
+        let result = interpolate_env_vars("prefix ${ZCLAW_TEST_VAR} suffix");
+        assert_eq!(result, "prefix hello suffix");
+    }
+
+    #[test]
+    fn test_interpolate_env_vars_missing() {
+        let result = interpolate_env_vars("${ZCLAW_NONEXISTENT_VAR_12345}");
+        assert_eq!(result, "${ZCLAW_NONEXISTENT_VAR_12345}");
+    }
+
+    #[test]
+    fn test_interpolate_env_vars_no_vars() {
+        let result = interpolate_env_vars("no variables here");
+        assert_eq!(result, "no variables here");
+    }
+
+    #[test]
+    fn test_interpolate_env_vars_multiple() {
+        std::env::set_var("ZCLAW_TEST_A", "alpha");
+        std::env::set_var("ZCLAW_TEST_B", "beta");
+        let result = interpolate_env_vars("${ZCLAW_TEST_A}-${ZCLAW_TEST_B}");
+        assert_eq!(result, "alpha-beta");
+    }
+}
--- a/crates/zclaw-kernel/src/kernel.rs
+++ b/crates/zclaw-kernel/src/kernel.rs
@@ -1,7 +1,7 @@
 //! Kernel - central coordinator

 use std::sync::Arc;
-use tokio::sync::{broadcast, mpsc};
+use tokio::sync::{broadcast, mpsc, Mutex};
 use zclaw_types::{AgentConfig, AgentId, AgentInfo, Event, Result};
 use async_trait::async_trait;
 use serde_json::Value;
@@ -13,7 +13,7 @@ use crate::config::KernelConfig;
 use zclaw_memory::MemoryStore;
 use zclaw_runtime::{AgentLoop, LlmDriver, ToolRegistry, tool::SkillExecutor};
 use zclaw_skills::SkillRegistry;
-use zclaw_hands::{HandRegistry, HandContext, HandResult, hands::{BrowserHand, SlideshowHand, SpeechHand, QuizHand, WhiteboardHand, ResearcherHand, CollectorHand, ClipHand, TwitterHand}};
+use zclaw_hands::{HandRegistry, HandContext, HandResult, hands::{BrowserHand, SlideshowHand, SpeechHand, QuizHand, WhiteboardHand, ResearcherHand, CollectorHand, ClipHand, TwitterHand, quiz::LlmQuizGenerator}};

 /// Skill executor implementation for Kernel
 pub struct KernelSkillExecutor {
@@ -57,6 +57,7 @@ pub struct Kernel {
    skill_executor: Arc<KernelSkillExecutor>,
    hands: Arc<HandRegistry>,
    trigger_manager: crate::trigger_manager::TriggerManager,
+    pending_approvals: Arc<Mutex<Vec<ApprovalEntry>>>,
 }

 impl Kernel {
@@ -85,10 +86,12 @@ impl Kernel {

        // Initialize hand registry with built-in hands
        let hands = Arc::new(HandRegistry::new());
+        let quiz_model = config.model().to_string();
+        let quiz_generator = Arc::new(LlmQuizGenerator::new(driver.clone(), quiz_model));
        hands.register(Arc::new(BrowserHand::new())).await;
        hands.register(Arc::new(SlideshowHand::new())).await;
        hands.register(Arc::new(SpeechHand::new())).await;
-        hands.register(Arc::new(QuizHand::new())).await;
+        hands.register(Arc::new(QuizHand::with_generator(quiz_generator))).await;
        hands.register(Arc::new(WhiteboardHand::new())).await;
        hands.register(Arc::new(ResearcherHand::new())).await;
        hands.register(Arc::new(CollectorHand::new())).await;
@@ -118,6 +121,7 @@ impl Kernel {
            skill_executor,
            hands,
            trigger_manager,
+            pending_approvals: Arc::new(Mutex::new(Vec::new())),
        })
    }

@@ -306,7 +310,8 @@ impl Kernel {
        .with_model(&model)
        .with_skill_executor(self.skill_executor.clone())
        .with_max_tokens(agent_config.max_tokens.unwrap_or_else(|| self.config.max_tokens()))
-        .with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()));
+        .with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()))
+        .with_compaction_threshold(15_000); // Compact when context exceeds ~15k tokens

        // Build system prompt with skill information injected
        let system_prompt = self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await;
@@ -327,6 +332,16 @@ impl Kernel {
        &self,
        agent_id: &AgentId,
        message: String,
+    ) -> Result<mpsc::Receiver<zclaw_runtime::LoopEvent>> {
+        self.send_message_stream_with_prompt(agent_id, message, None).await
+    }
+
+    /// Send a message with streaming and optional external system prompt
+    pub async fn send_message_stream_with_prompt(
+        &self,
+        agent_id: &AgentId,
+        message: String,
+        system_prompt_override: Option<String>,
    ) -> Result<mpsc::Receiver<zclaw_runtime::LoopEvent>> {
        let agent_config = self.registry.get(agent_id)
            .ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Agent not found: {}", agent_id)))?;
@@ -349,10 +364,14 @@ impl Kernel {
        .with_model(&model)
        .with_skill_executor(self.skill_executor.clone())
        .with_max_tokens(agent_config.max_tokens.unwrap_or_else(|| self.config.max_tokens()))
-        .with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()));
+        .with_temperature(agent_config.temperature.unwrap_or_else(|| self.config.temperature()))
+        .with_compaction_threshold(15_000); // Compact when context exceeds ~15k tokens

-        // Build system prompt with skill information injected
-        let system_prompt = self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await;
+        // Use external prompt if provided, otherwise build default
+        let system_prompt = match system_prompt_override {
+            Some(prompt) => prompt,
+            None => self.build_system_prompt_with_skills(agent_config.system_prompt.as_ref()).await,
+        };
        let loop_runner = loop_runner.with_system_prompt(&system_prompt);

        // Run with streaming
@@ -477,24 +496,82 @@ impl Kernel {
    }

    // ============================================================
-    // Approval Management (Stub Implementation)
+    // Approval Management
    // ============================================================

    /// List pending approvals
    pub async fn list_approvals(&self) -> Vec<ApprovalEntry> {
-        // Stub: Return empty list
-        Vec::new()
+        let approvals = self.pending_approvals.lock().await;
+        approvals.iter().filter(|a| a.status == "pending").cloned().collect()
+    }
+
+    /// Create a pending approval (called when a needs_approval hand is triggered)
+    pub async fn create_approval(&self, hand_id: String, input: serde_json::Value) -> ApprovalEntry {
+        let entry = ApprovalEntry {
+            id: uuid::Uuid::new_v4().to_string(),
+            hand_id,
+            status: "pending".to_string(),
+            created_at: chrono::Utc::now(),
+            input,
+        };
+        let mut approvals = self.pending_approvals.lock().await;
+        approvals.push(entry.clone());
+        entry
    }

    /// Respond to an approval
    pub async fn respond_to_approval(
        &self,
-        _id: &str,
-        _approved: bool,
+        id: &str,
+        approved: bool,
        _reason: Option<String>,
    ) -> Result<()> {
-        // Stub: Return error
-        Err(zclaw_types::ZclawError::NotFound(format!("Approval not found")))
+        let mut approvals = self.pending_approvals.lock().await;
+        let entry = approvals.iter_mut().find(|a| a.id == id && a.status == "pending")
+            .ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Approval not found: {}", id)))?;
+
+        entry.status = if approved { "approved".to_string() } else { "rejected".to_string() };
+
+        if approved {
+            let hand_id = entry.hand_id.clone();
+            let input = entry.input.clone();
+            drop(approvals); // Release lock before async hand execution
+
+            // Execute the hand in background
+            let hands = self.hands.clone();
+            let approvals = self.pending_approvals.clone();
+            let id_owned = id.to_string();
+            tokio::spawn(async move {
+                let context = HandContext::default();
+                let result = hands.execute(&hand_id, &context, input).await;
+
+                // Update approval status based on execution result
+                let mut approvals = approvals.lock().await;
+                if let Some(entry) = approvals.iter_mut().find(|a| a.id == id_owned) {
+                    match result {
+                        Ok(_) => entry.status = "completed".to_string(),
+                        Err(e) => {
+                            entry.status = "failed".to_string();
+                            // Store error in input metadata
+                            if let Some(obj) = entry.input.as_object_mut() {
+                                obj.insert("error".to_string(), Value::String(format!("{}", e)));
+                            }
+                        }
+                    }
+                }
+            });
+        }
+
+        Ok(())
+    }
+
+    /// Cancel a pending approval
+    pub async fn cancel_approval(&self, id: &str) -> Result<()> {
+        let mut approvals = self.pending_approvals.lock().await;
+        let entry = approvals.iter_mut().find(|a| a.id == id && a.status == "pending")
+            .ok_or_else(|| zclaw_types::ZclawError::NotFound(format!("Approval not found: {}", id)))?;
+        entry.status = "cancelled".to_string();
+        Ok(())
    }
 }

--- a/crates/zclaw-memory/Cargo.toml
+++ b/crates/zclaw-memory/Cargo.toml
@@ -20,6 +20,7 @@ tracing = { workspace = true }

 # SQLite
 sqlx = { workspace = true }
+libsqlite3-sys = { workspace = true }

 # Async utilities
 futures = { workspace = true }
--- a/crates/zclaw-pipeline/src/actions/export.rs
+++ b/crates/zclaw-pipeline/src/actions/export.rs
@@ -46,11 +46,14 @@ pub async fn export_files(
                    .map_err(|e| ActionError::Export(format!("Write error: {}", e)))?;
            }
            ExportFormat::Pptx => {
-                // Will integrate with zclaw-kernel export
-                return Err(ActionError::Export("PPTX export requires kernel integration".to_string()));
+                return Err(ActionError::Export(
+                    "PPTX 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 JSON 格式导出后转换。".to_string(),
+                ));
            }
            ExportFormat::Pdf => {
-                return Err(ActionError::Export("PDF export not yet implemented".to_string()));
+                return Err(ActionError::Export(
+                    "PDF 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 HTML 格式导出后通过浏览器打印为 PDF。".to_string(),
+                ));
            }
        }

--- a/crates/zclaw-pipeline/src/actions/hand.rs
+++ b/crates/zclaw-pipeline/src/actions/hand.rs
@@ -1,21 +0,0 @@
-//! Hand execution action
-
-use std::collections::HashMap;
-use serde_json::Value;
-
-use super::ActionError;
-
-/// Execute a hand action
-pub async fn execute_hand(
-    hand_id: &str,
-    action: &str,
-    _params: HashMap<String, Value>,
-) -> Result<Value, ActionError> {
-    // This will be implemented by injecting the hand registry
-    // For now, return an error indicating it needs configuration
-
-    Err(ActionError::Hand(format!(
-        "Hand '{}' action '{}' requires hand registry configuration",
-        hand_id, action
-    )))
-}
--- a/crates/zclaw-pipeline/src/actions/mod.rs
+++ b/crates/zclaw-pipeline/src/actions/mod.rs
@@ -7,8 +7,6 @@ mod parallel;
 mod render;
 mod export;
 mod http;
-mod skill;
-mod hand;
 mod orchestration;

 pub use llm::*;
@@ -16,8 +14,6 @@ pub use parallel::*;
 pub use render::*;
 pub use export::*;
 pub use http::*;
-pub use skill::*;
-pub use hand::*;
 pub use orchestration::*;

 use std::collections::HashMap;
@@ -256,11 +252,14 @@ impl ActionRegistry {
                tokio::fs::write(&path, content).await?;
            }
            ExportFormat::Pptx => {
-                // Will integrate with pptx exporter
-                return Err(ActionError::Export("PPTX export not yet implemented".to_string()));
+                return Err(ActionError::Export(
+                    "PPTX 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 JSON 格式导出后转换。".to_string(),
+                ));
            }
            ExportFormat::Pdf => {
-                return Err(ActionError::Export("PDF export not yet implemented".to_string()));
+                return Err(ActionError::Export(
+                    "PDF 导出暂不可用。桌面端可通过 Pipeline 结果面板使用 HTML 格式导出后通过浏览器打印为 PDF。".to_string(),
+                ));
            }
        }

--- a/crates/zclaw-pipeline/src/actions/skill.rs
+++ b/crates/zclaw-pipeline/src/actions/skill.rs
@@ -1,20 +0,0 @@
-//! Skill execution action
-
-use std::collections::HashMap;
-use serde_json::Value;
-
-use super::ActionError;
-
-/// Execute a skill by ID
-pub async fn execute_skill(
-    skill_id: &str,
-    _input: HashMap<String, Value>,
-) -> Result<Value, ActionError> {
-    // This will be implemented by injecting the skill registry
-    // For now, return an error indicating it needs configuration
-
-    Err(ActionError::Skill(format!(
-        "Skill '{}' execution requires skill registry configuration",
-        skill_id
-    )))
-}
--- a/crates/zclaw-pipeline/src/engine/stage.rs
+++ b/crates/zclaw-pipeline/src/engine/stage.rs
@@ -10,11 +10,9 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 use async_trait::async_trait;
-use futures::future::join_all;
 use serde_json::{Value, json};
-use tokio::sync::RwLock;

-use crate::types_v2::{Stage, ConditionalBranch, PresentationType};
+use crate::types_v2::{Stage, ConditionalBranch};
 use crate::engine::context::{ExecutionContextV2, ContextError};

 /// Stage execution result
@@ -242,14 +240,6 @@ impl StageEngine {
                Ok(result)
            }
            Err(e) => {
-                let result = StageResult {
-                    stage_id: stage_id.clone(),
-                    output: Value::Null,
-                    status: StageStatus::Failed,
-                    error: Some(e.to_string()),
-                    duration_ms,
-                };
-
                self.emit_event(StageEvent::Error {
                    stage_id,
                    error: e.to_string(),
@@ -312,7 +302,7 @@ impl StageEngine {
        stage_id: &str,
        each: &str,
        stage_template: &Stage,
-        max_workers: usize,
+        _max_workers: usize,
        context: &mut ExecutionContextV2,
    ) -> Result<Value, StageError> {
        // Resolve the array to iterate over
@@ -419,7 +409,7 @@ impl StageEngine {
    /// Execute compose stage
    async fn execute_compose(
        &self,
-        stage_id: &str,
+        _stage_id: &str,
        template: &str,
        context: &ExecutionContextV2,
    ) -> Result<Value, StageError> {
@@ -568,7 +558,8 @@ impl StageEngine {
        Ok(resolved_value)
    }

-    /// Clone with drivers
+    /// Clone with drivers (reserved for future use)
+    #[allow(dead_code)]
    fn clone_with_drivers(&self) -> Self {
        Self {
            llm_driver: self.llm_driver.clone(),
--- a/crates/zclaw-pipeline/src/intent.rs
+++ b/crates/zclaw-pipeline/src/intent.rs
@@ -396,6 +396,7 @@ pub trait LlmIntentDriver: Send + Sync {
 }

 /// Default LLM driver implementation using prompt-based matching
+#[allow(dead_code)]
 pub struct DefaultLlmIntentDriver {
    /// Model ID to use
    model_id: String,
--- a/crates/zclaw-pipeline/src/lib.rs
+++ b/crates/zclaw-pipeline/src/lib.rs
@@ -57,6 +57,7 @@ pub mod intent;
 pub mod engine;
 pub mod presentation;

+// Glob re-exports with explicit disambiguation for conflicting names
 pub use types::*;
 pub use types_v2::*;
 pub use parser::*;
@@ -67,6 +68,14 @@ pub use trigger::*;
 pub use intent::*;
 pub use engine::*;
 pub use presentation::*;
+
+// Explicit re-exports: presentation::* wins for PresentationType/ExportFormat
+// types_v2::* wins for InputMode, engine::* wins for LoopContext
+pub use presentation::PresentationType;
+pub use presentation::ExportFormat;
+pub use types_v2::InputMode;
+pub use engine::context::LoopContext;
+
 pub use actions::ActionRegistry;
 pub use actions::{LlmActionDriver, SkillActionDriver, HandActionDriver, OrchestrationActionDriver};

--- a/crates/zclaw-pipeline/src/presentation/analyzer.rs
+++ b/crates/zclaw-pipeline/src/presentation/analyzer.rs
@@ -13,7 +13,6 @@
 //!    - Better recommendations for ambiguous cases

 use serde_json::Value;
-use std::collections::HashMap;

 use super::types::*;

--- a/crates/zclaw-pipeline/src/trigger.rs
+++ b/crates/zclaw-pipeline/src/trigger.rs
@@ -254,13 +254,13 @@ pub fn compile_pattern(pattern: &str) -> Result<CompiledPattern, PatternError> {
            '{' => {
                // Named capture group
                let mut name = String::new();
-                let mut has_type = false;
+                let mut _has_type = false;

                while let Some(c) = chars.next() {
                    match c {
                        '}' => break,
                        ':' => {
-                            has_type = true;
+                            _has_type = true;
                            // Skip type part
                            while let Some(nc) = chars.peek() {
                                if *nc == '}' {
--- a/crates/zclaw-runtime/src/compaction.rs
+++ b/crates/zclaw-runtime/src/compaction.rs
@@ -0,0 +1,365 @@
+//! Context compaction for the agent loop.
+//!
+//! Provides rule-based token estimation and message compaction to prevent
+//! conversations from exceeding LLM context windows. When the estimated
+//! token count exceeds the configured threshold, older messages are
+//! summarized into a single system message and only recent messages are
+//! retained.
+
+use zclaw_types::Message;
+
+/// Number of recent messages to preserve after compaction.
+const DEFAULT_KEEP_RECENT: usize = 6;
+
+/// Heuristic token count estimation.
+///
+/// CJK characters ≈ 1.5 tokens each, English words ≈ 1.3 tokens each.
+/// Intentionally conservative (overestimates) to avoid hitting real limits.
+pub fn estimate_tokens(text: &str) -> usize {
+    if text.is_empty() {
+        return 0;
+    }
+
+    let mut tokens: f64 = 0.0;
+    for char in text.chars() {
+        let code = char as u32;
+        if (0x4E00..=0x9FFF).contains(&code)
+            || (0x3400..=0x4DBF).contains(&code)
+            || (0x20000..=0x2A6DF).contains(&code)
+            || (0xF900..=0xFAFF).contains(&code)
+        {
+            // CJK ideographs — ~1.5 tokens
+            tokens += 1.5;
+        } else if (0x3000..=0x303F).contains(&code) || (0xFF00..=0xFFEF).contains(&code) {
+            // CJK / fullwidth punctuation — ~1.0 token
+            tokens += 1.0;
+        } else if char == ' ' || char == '\n' || char == '\t' {
+            // whitespace
+            tokens += 0.25;
+        } else {
+            // ASCII / Latin characters — roughly 4 chars per token
+            tokens += 0.3;
+        }
+    }
+
+    tokens.ceil() as usize
+}
+
+/// Estimate total tokens for a list of messages (including framing overhead).
+pub fn estimate_messages_tokens(messages: &[Message]) -> usize {
+    let mut total = 0;
+    for msg in messages {
+        match msg {
+            Message::User { content } => {
+                total += estimate_tokens(content);
+                total += 4;
+            }
+            Message::Assistant { content, thinking } => {
+                total += estimate_tokens(content);
+                if let Some(th) = thinking {
+                    total += estimate_tokens(th);
+                }
+                total += 4;
+            }
+            Message::System { content } => {
+                total += estimate_tokens(content);
+                total += 4;
+            }
+            Message::ToolUse { input, .. } => {
+                total += estimate_tokens(&input.to_string());
+                total += 4;
+            }
+            Message::ToolResult { output, .. } => {
+                total += estimate_tokens(&output.to_string());
+                total += 4;
+            }
+        }
+    }
+    total
+}
+
+/// Compact a message list by summarizing old messages and keeping recent ones.
+///
+/// When `messages.len() > keep_recent`, the oldest messages are summarized
+/// into a single system message. System messages at the beginning of the
+/// conversation are always preserved.
+///
+/// Returns the compacted message list and the number of original messages removed.
+pub fn compact_messages(messages: Vec<Message>, keep_recent: usize) -> (Vec<Message>, usize) {
+    if messages.len() <= keep_recent {
+        return (messages, 0);
+    }
+
+    // Preserve leading system messages (they contain compaction summaries from prior runs)
+    let leading_system_count = messages
+        .iter()
+        .take_while(|m| matches!(m, Message::System { .. }))
+        .count();
+
+    // Calculate split point: keep leading system + recent messages
+    let keep_from_end = keep_recent.min(messages.len().saturating_sub(leading_system_count));
+    let split_index = messages.len().saturating_sub(keep_from_end);
+
+    // Ensure we keep at least the leading system messages
+    let split_index = split_index.max(leading_system_count);
+
+    if split_index == 0 {
+        return (messages, 0);
+    }
+
+    let old_messages = &messages[..split_index];
+    let recent_messages = &messages[split_index..];
+
+    let summary = generate_summary(old_messages);
+    let removed_count = old_messages.len();
+
+    let mut compacted = Vec::with_capacity(1 + recent_messages.len());
+    compacted.push(Message::system(summary));
+    compacted.extend(recent_messages.iter().cloned());
+
+    (compacted, removed_count)
+}
+
+/// Check if compaction should be triggered and perform it if needed.
+///
+/// Returns the (possibly compacted) message list.
+pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
+    let tokens = estimate_messages_tokens(&messages);
+    if tokens < threshold {
+        return messages;
+    }
+
+    tracing::info!(
+        "[Compaction] Triggered: {} tokens > {} threshold, {} messages",
+        tokens,
+        threshold,
+        messages.len(),
+    );
+
+    let (compacted, removed) = compact_messages(messages, DEFAULT_KEEP_RECENT);
+    tracing::info!(
+        "[Compaction] Removed {} messages, {} remain",
+        removed,
+        compacted.len(),
+    );
+
+    compacted
+}
+
+/// Generate a rule-based summary of old messages.
+fn generate_summary(messages: &[Message]) -> String {
+    if messages.is_empty() {
+        return "[对话开始]".to_string();
+    }
+
+    let mut sections: Vec<String> = vec!["[以下是之前对话的摘要]".to_string()];
+
+    let mut user_count = 0;
+    let mut assistant_count = 0;
+    let mut topics: Vec<String> = Vec::new();
+
+    for msg in messages {
+        match msg {
+            Message::User { content } => {
+                user_count += 1;
+                let topic = extract_topic(content);
+                if let Some(t) = topic {
+                    topics.push(t);
+                }
+            }
+            Message::Assistant { .. } => {
+                assistant_count += 1;
+            }
+            Message::System { content } => {
+                // Skip system messages that are previous compaction summaries
+                if !content.starts_with("[以下是之前对话的摘要]") {
+                    sections.push(format!("系统提示: {}", truncate(content, 60)));
+                }
+            }
+            Message::ToolUse { tool, .. } => {
+                sections.push(format!("工具调用: {}", tool.as_str()));
+            }
+            Message::ToolResult { .. } => {
+                // Skip tool results in summary
+            }
+        }
+    }
+
+    if !topics.is_empty() {
+        let topic_list: Vec<String> = topics.iter().take(8).cloned().collect();
+        sections.push(format!("讨论主题: {}", topic_list.join("; ")));
+    }
+
+    sections.push(format!(
+        "(已压缩 {} 条消息，其中用户 {} 条，助手 {} 条)",
+        messages.len(),
+        user_count,
+        assistant_count,
+    ));
+
+    let summary = sections.join("\n");
+
+    // Enforce max length
+    let max_chars = 800;
+    if summary.len() > max_chars {
+        format!("{}...\n(摘要已截断)", &summary[..max_chars])
+    } else {
+        summary
+    }
+}
+
+/// Extract the main topic from a user message (first sentence or first 50 chars).
+fn extract_topic(content: &str) -> Option<String> {
+    let trimmed = content.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+
+    // Find sentence end markers
+    for (i, char) in trimmed.char_indices() {
+        if char == '。' || char == '！' || char == '？' || char == '\n' {
+            let end = i + char.len_utf8();
+            if end <= 80 {
+                return Some(trimmed[..end].trim().to_string());
+            }
+            break;
+        }
+    }
+
+    if trimmed.chars().count() <= 50 {
+        return Some(trimmed.to_string());
+    }
+
+    Some(format!("{}...", trimmed.chars().take(50).collect::<String>()))
+}
+
+/// Truncate text to max_chars at char boundary.
+fn truncate(text: &str, max_chars: usize) -> String {
+    if text.chars().count() <= max_chars {
+        return text.to_string();
+    }
+    let truncated: String = text.chars().take(max_chars).collect();
+    format!("{}...", truncated)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_estimate_tokens_empty() {
+        assert_eq!(estimate_tokens(""), 0);
+    }
+
+    #[test]
+    fn test_estimate_tokens_english() {
+        let tokens = estimate_tokens("Hello world");
+        assert!(tokens > 0);
+    }
+
+    #[test]
+    fn test_estimate_tokens_cjk() {
+        let tokens = estimate_tokens("你好世界");
+        assert!(tokens > 3); // CJK chars are ~1.5 tokens each
+    }
+
+    #[test]
+    fn test_estimate_messages_tokens() {
+        let messages = vec![
+            Message::user("Hello"),
+            Message::assistant("Hi there"),
+        ];
+        let tokens = estimate_messages_tokens(&messages);
+        assert!(tokens > 0);
+    }
+
+    #[test]
+    fn test_compact_messages_under_threshold() {
+        let messages = vec![
+            Message::user("Hello"),
+            Message::assistant("Hi"),
+        ];
+        let (result, removed) = compact_messages(messages, 6);
+        assert_eq!(removed, 0);
+        assert_eq!(result.len(), 2);
+    }
+
+    #[test]
+    fn test_compact_messages_over_threshold() {
+        let messages: Vec<Message> = (0..10)
+            .flat_map(|i| {
+                vec![
+                    Message::user(format!("Question {}", i)),
+                    Message::assistant(format!("Answer {}", i)),
+                ]
+            })
+            .collect();
+
+        let (result, removed) = compact_messages(messages, 4);
+        assert!(removed > 0);
+        // Should have: 1 summary + 4 recent messages
+        assert_eq!(result.len(), 5);
+        // First message should be a system summary
+        assert!(matches!(&result[0], Message::System { .. }));
+    }
+
+    #[test]
+    fn test_compact_preserves_leading_system() {
+        let messages = vec![
+            Message::system("You are helpful"),
+            Message::user("Q1"),
+            Message::assistant("A1"),
+            Message::user("Q2"),
+            Message::assistant("A2"),
+            Message::user("Q3"),
+            Message::assistant("A3"),
+        ];
+
+        let (result, removed) = compact_messages(messages, 4);
+        assert!(removed > 0);
+        // Should start with compaction summary, then recent messages
+        assert!(matches!(&result[0], Message::System { .. }));
+    }
+
+    #[test]
+    fn test_maybe_compact_under_threshold() {
+        let messages = vec![
+            Message::user("Short message"),
+            Message::assistant("Short reply"),
+        ];
+        let result = maybe_compact(messages, 100_000);
+        assert_eq!(result.len(), 2);
+    }
+
+    #[test]
+    fn test_extract_topic_sentence() {
+        let topic = extract_topic("什么是Rust的所有权系统？").unwrap();
+        assert!(topic.contains("所有权"));
+    }
+
+    #[test]
+    fn test_extract_topic_short() {
+        let topic = extract_topic("Hello").unwrap();
+        assert_eq!(topic, "Hello");
+    }
+
+    #[test]
+    fn test_extract_topic_long() {
+        let long = "This is a very long message that exceeds fifty characters in total length";
+        let topic = extract_topic(long).unwrap();
+        assert!(topic.ends_with("..."));
+    }
+
+    #[test]
+    fn test_generate_summary() {
+        let messages = vec![
+            Message::user("What is Rust?"),
+            Message::assistant("Rust is a systems programming language"),
+            Message::user("How does ownership work?"),
+            Message::assistant("Ownership is Rust's memory management system"),
+        ];
+        let summary = generate_summary(&messages);
+        assert!(summary.contains("摘要"));
+        assert!(summary.contains("2"));
+    }
+}
--- a/crates/zclaw-runtime/src/driver/gemini.rs
+++ b/crates/zclaw-runtime/src/driver/gemini.rs
@@ -1,9 +1,17 @@
 //! Google Gemini driver implementation
+//!
+//! Implements the Gemini REST API v1beta with full support for:
+//! - Text generation (complete and streaming)
+//! - Tool / function calling
+//! - System instructions
+//! - Token usage reporting

 use async_trait::async_trait;
-use futures::Stream;
+use async_stream::stream;
+use futures::{Stream, StreamExt};
 use secrecy::{ExposeSecret, SecretString};
 use reqwest::Client;
+use serde::{Deserialize, Serialize};
 use std::pin::Pin;
 use zclaw_types::{Result, ZclawError};

@@ -11,7 +19,6 @@ use super::{CompletionRequest, CompletionResponse, ContentBlock, LlmDriver, Stop
 use crate::stream::StreamChunk;

 /// Google Gemini driver
-#[allow(dead_code)] // TODO: Implement full Gemini API support
 pub struct GeminiDriver {
    client: Client,
    api_key: SecretString,
@@ -21,11 +28,31 @@ pub struct GeminiDriver {
 impl GeminiDriver {
    pub fn new(api_key: SecretString) -> Self {
        Self {
-            client: Client::new(),
+            client: Client::builder()
+                .user_agent(crate::USER_AGENT)
+                .http1_only()
+                .timeout(std::time::Duration::from_secs(120))
+                .connect_timeout(std::time::Duration::from_secs(30))
+                .build()
+                .unwrap_or_else(|_| Client::new()),
            api_key,
            base_url: "https://generativelanguage.googleapis.com/v1beta".to_string(),
        }
    }
+
+    pub fn with_base_url(api_key: SecretString, base_url: String) -> Self {
+        Self {
+            client: Client::builder()
+                .user_agent(crate::USER_AGENT)
+                .http1_only()
+                .timeout(std::time::Duration::from_secs(120))
+                .connect_timeout(std::time::Duration::from_secs(30))
+                .build()
+                .unwrap_or_else(|_| Client::new()),
+            api_key,
+            base_url,
+        }
+    }
 }

 #[async_trait]
@@ -39,25 +66,594 @@ impl LlmDriver for GeminiDriver {
    }

    async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
-        // TODO: Implement actual API call
-        Ok(CompletionResponse {
-            content: vec![ContentBlock::Text {
-                text: "Gemini driver not yet implemented".to_string(),
-            }],
-            model: request.model,
-            input_tokens: 0,
-            output_tokens: 0,
-            stop_reason: StopReason::EndTurn,
-        })
+        let api_request = self.build_api_request(&request);
+        let url = format!(
+            "{}/models/{}:generateContent?key={}",
+            self.base_url,
+            request.model,
+            self.api_key.expose_secret()
+        );
+
+        tracing::debug!(target: "gemini_driver", "Sending request to: {}", url);
+
+        let response = self.client
+            .post(&url)
+            .header("content-type", "application/json")
+            .json(&api_request)
+            .send()
+            .await
+            .map_err(|e| ZclawError::LlmError(format!("HTTP request failed: {}", e)))?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let body = response.text().await.unwrap_or_default();
+            tracing::warn!(target: "gemini_driver", "API error {}: {}", status, body);
+            return Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
+        }
+
+        let api_response: GeminiResponse = response
+            .json()
+            .await
+            .map_err(|e| ZclawError::LlmError(format!("Failed to parse response: {}", e)))?;
+
+        Ok(self.convert_response(api_response, request.model))
    }

    fn stream(
        &self,
-        _request: CompletionRequest,
+        request: CompletionRequest,
    ) -> Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send + '_>> {
-        // Placeholder - return error stream
-        Box::pin(futures::stream::once(async {
-            Err(ZclawError::LlmError("Gemini streaming not yet implemented".to_string()))
-        }))
+        let api_request = self.build_api_request(&request);
+        let url = format!(
+            "{}/models/{}:streamGenerateContent?alt=sse&key={}",
+            self.base_url,
+            request.model,
+            self.api_key.expose_secret()
+        );
+
+        tracing::debug!(target: "gemini_driver", "Starting stream request to: {}", url);
+
+        Box::pin(stream! {
+            let response = match self.client
+                .post(&url)
+                .header("content-type", "application/json")
+                .timeout(std::time::Duration::from_secs(120))
+                .json(&api_request)
+                .send()
+                .await
+            {
+                Ok(r) => {
+                    tracing::debug!(target: "gemini_driver", "Stream response status: {}", r.status());
+                    r
+                },
+                Err(e) => {
+                    tracing::error!(target: "gemini_driver", "HTTP request failed: {:?}", e);
+                    yield Err(ZclawError::LlmError(format!("HTTP request failed: {}", e)));
+                    return;
+                }
+            };
+
+            if !response.status().is_success() {
+                let status = response.status();
+                let body = response.text().await.unwrap_or_default();
+                yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
+                return;
+            }
+
+            let mut byte_stream = response.bytes_stream();
+            let mut accumulated_tool_calls: std::collections::HashMap<usize, (String, String)> = std::collections::HashMap::new();
+
+            while let Some(chunk_result) = byte_stream.next().await {
+                let chunk = match chunk_result {
+                    Ok(c) => c,
+                    Err(e) => {
+                        yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
+                        continue;
+                    }
+                };
+
+                let text = String::from_utf8_lossy(&chunk);
+                for line in text.lines() {
+                    if let Some(data) = line.strip_prefix("data: ") {
+                        match serde_json::from_str::<GeminiStreamResponse>(data) {
+                            Ok(resp) => {
+                                if let Some(candidate) = resp.candidates.first() {
+                                    let content = match &candidate.content {
+                                        Some(c) => c,
+                                        None => continue,
+                                    };
+
+                                    let parts = &content.parts;
+
+                                    for (idx, part) in parts.iter().enumerate() {
+                                        // Handle text content
+                                        if let Some(text) = &part.text {
+                                            if !text.is_empty() {
+                                                yield Ok(StreamChunk::TextDelta { delta: text.clone() });
+                                            }
+                                        }
+
+                                        // Handle function call (tool use)
+                                        if let Some(fc) = &part.function_call {
+                                            let name = fc.name.clone().unwrap_or_default();
+                                            let args = fc.args.clone().unwrap_or(serde_json::Value::Object(Default::default()));
+
+                                            // Emit ToolUseStart if this is a new tool call
+                                            if !accumulated_tool_calls.contains_key(&idx) {
+                                                accumulated_tool_calls.insert(idx, (name.clone(), String::new()));
+                                                yield Ok(StreamChunk::ToolUseStart {
+                                                    id: format!("gemini_call_{}", idx),
+                                                    name,
+                                                });
+                                            }
+
+                                            // Emit the function arguments as delta
+                                            let args_str = serde_json::to_string(&args).unwrap_or_default();
+                                            let call_id = format!("gemini_call_{}", idx);
+                                            yield Ok(StreamChunk::ToolUseDelta {
+                                                id: call_id.clone(),
+                                                delta: args_str.clone(),
+                                            });
+
+                                            // Accumulate
+                                            if let Some(entry) = accumulated_tool_calls.get_mut(&idx) {
+                                                entry.1 = args_str;
+                                            }
+                                        }
+                                    }
+
+                                    // When the candidate is finished, emit ToolUseEnd for all pending
+                                    if let Some(ref finish_reason) = candidate.finish_reason {
+                                        let is_final = finish_reason == "STOP" || finish_reason == "MAX_TOKENS";
+
+                                        if is_final {
+                                            // Emit ToolUseEnd for all accumulated tool calls
+                                            for (idx, (_name, args_str)) in &accumulated_tool_calls {
+                                                let input: serde_json::Value = if args_str.is_empty() {
+                                                    serde_json::json!({})
+                                                } else {
+                                                    serde_json::from_str(args_str).unwrap_or_else(|e| {
+                                                        tracing::warn!(target: "gemini_driver", "Failed to parse tool args '{}': {}", args_str, e);
+                                                        serde_json::json!({})
+                                                    })
+                                                };
+                                                yield Ok(StreamChunk::ToolUseEnd {
+                                                    id: format!("gemini_call_{}", idx),
+                                                    input,
+                                                });
+                                            }
+
+                                            // Extract usage metadata from the response
+                                            let usage = resp.usage_metadata.as_ref();
+                                            let input_tokens = usage.map(|u| u.prompt_token_count.unwrap_or(0)).unwrap_or(0);
+                                            let output_tokens = usage.map(|u| u.candidates_token_count.unwrap_or(0)).unwrap_or(0);
+
+                                            let stop_reason = match finish_reason.as_str() {
+                                                "STOP" => "end_turn",
+                                                "MAX_TOKENS" => "max_tokens",
+                                                "SAFETY" => "error",
+                                                "RECITATION" => "error",
+                                                _ => "end_turn",
+                                            };
+
+                                            yield Ok(StreamChunk::Complete {
+                                                input_tokens,
+                                                output_tokens,
+                                                stop_reason: stop_reason.to_string(),
+                                            });
+                                        }
+                                    }
+                                }
+                            }
+                            Err(e) => {
+                                tracing::warn!(target: "gemini_driver", "Failed to parse SSE event: {} - {}", e, data);
+                            }
+                        }
+                    }
+                }
+            }
+        })
    }
 }
+
+impl GeminiDriver {
+    /// Convert a CompletionRequest into the Gemini API request format.
+    ///
+    /// Key mapping decisions:
+    /// - `system` prompt maps to `systemInstruction`
+    /// - Messages use Gemini's `contents` array with `role`/`parts`
+    /// - Tool definitions use `functionDeclarations`
+    /// - Tool results are sent as `functionResponse` parts in `user` messages
+    fn build_api_request(&self, request: &CompletionRequest) -> GeminiRequest {
+        let mut contents: Vec<GeminiContent> = Vec::new();
+
+        for msg in &request.messages {
+            match msg {
+                zclaw_types::Message::User { content } => {
+                    contents.push(GeminiContent {
+                        role: "user".to_string(),
+                        parts: vec![GeminiPart {
+                            text: Some(content.clone()),
+                            inline_data: None,
+                            function_call: None,
+                            function_response: None,
+                        }],
+                    });
+                }
+                zclaw_types::Message::Assistant { content, thinking } => {
+                    let mut parts = Vec::new();
+                    // Gemini does not have a native "thinking" field, so we prepend
+                    // any thinking content as a text part with a marker.
+                    if let Some(think) = thinking {
+                        if !think.is_empty() {
+                            parts.push(GeminiPart {
+                                text: Some(format!("[thinking]\n{}\n[/thinking]", think)),
+                                inline_data: None,
+                                function_call: None,
+                                function_response: None,
+                            });
+                        }
+                    }
+                    parts.push(GeminiPart {
+                        text: Some(content.clone()),
+                        inline_data: None,
+                        function_call: None,
+                        function_response: None,
+                    });
+                    contents.push(GeminiContent {
+                        role: "model".to_string(),
+                        parts,
+                    });
+                }
+                zclaw_types::Message::ToolUse { id: _, tool, input } => {
+                    // Tool use from the assistant is represented as a functionCall part
+                    let args = if input.is_null() {
+                        serde_json::json!({})
+                    } else {
+                        input.clone()
+                    };
+                    contents.push(GeminiContent {
+                        role: "model".to_string(),
+                        parts: vec![GeminiPart {
+                            text: None,
+                            inline_data: None,
+                            function_call: Some(GeminiFunctionCall {
+                                name: Some(tool.to_string()),
+                                args: Some(args),
+                            }),
+                            function_response: None,
+                        }],
+                    });
+                }
+                zclaw_types::Message::ToolResult { tool_call_id, tool, output, is_error } => {
+                    // Tool results are sent as functionResponse parts in a "user" role message.
+                    // Gemini requires that function responses reference the function name
+                    // and include the response wrapped in a "result" or "error" key.
+                    let response_content = if *is_error {
+                        serde_json::json!({ "error": output.to_string() })
+                    } else {
+                        serde_json::json!({ "result": output.clone() })
+                    };
+
+                    contents.push(GeminiContent {
+                        role: "user".to_string(),
+                        parts: vec![GeminiPart {
+                            text: None,
+                            inline_data: None,
+                            function_call: None,
+                            function_response: Some(GeminiFunctionResponse {
+                                name: tool.to_string(),
+                                response: response_content,
+                            }),
+                        }],
+                    });
+
+                    // Gemini ignores tool_call_id, but we log it for debugging
+                    let _ = tool_call_id;
+                }
+                zclaw_types::Message::System { content } => {
+                    // System messages are converted to user messages with system context.
+                    // Note: the primary system prompt is handled via systemInstruction.
+                    // Inline system messages in conversation history become user messages.
+                    contents.push(GeminiContent {
+                        role: "user".to_string(),
+                        parts: vec![GeminiPart {
+                            text: Some(content.clone()),
+                            inline_data: None,
+                            function_call: None,
+                            function_response: None,
+                        }],
+                    });
+                }
+            }
+        }
+
+        // Build tool declarations
+        let function_declarations: Vec<GeminiFunctionDeclaration> = request.tools
+            .iter()
+            .map(|t| GeminiFunctionDeclaration {
+                name: t.name.clone(),
+                description: t.description.clone(),
+                parameters: t.input_schema.clone(),
+            })
+            .collect();
+
+        // Build generation config
+        let mut generation_config = GeminiGenerationConfig::default();
+        if let Some(temp) = request.temperature {
+            generation_config.temperature = Some(temp);
+        }
+        if let Some(max) = request.max_tokens {
+            generation_config.max_output_tokens = Some(max);
+        }
+        if !request.stop.is_empty() {
+            generation_config.stop_sequences = Some(request.stop.clone());
+        }
+
+        // Build system instruction
+        let system_instruction = request.system.as_ref().map(|s| GeminiSystemInstruction {
+            parts: vec![GeminiPart {
+                text: Some(s.clone()),
+                inline_data: None,
+                function_call: None,
+                function_response: None,
+            }],
+        });
+
+        GeminiRequest {
+            contents,
+            system_instruction,
+            generation_config: Some(generation_config),
+            tools: if function_declarations.is_empty() {
+                None
+            } else {
+                Some(vec![GeminiTool {
+                    function_declarations,
+                }])
+            },
+        }
+    }
+
+    /// Convert a Gemini API response into a CompletionResponse.
+    fn convert_response(&self, api_response: GeminiResponse, model: String) -> CompletionResponse {
+        let candidate = api_response.candidates.first();
+
+        let (content, stop_reason) = match candidate {
+            Some(c) => {
+                let parts = c.content.as_ref()
+                    .map(|content| content.parts.as_slice())
+                    .unwrap_or(&[]);
+
+                let mut blocks: Vec<ContentBlock> = Vec::new();
+                let mut has_tool_use = false;
+
+                for part in parts {
+                    // Handle text content
+                    if let Some(text) = &part.text {
+                        // Skip thinking markers we injected
+                        if text.starts_with("[thinking]\n") && text.contains("[/thinking]") {
+                            let thinking_content = text
+                                .strip_prefix("[thinking]\n")
+                                .and_then(|s| s.strip_suffix("\n[/thinking]"))
+                                .unwrap_or("");
+                            if !thinking_content.is_empty() {
+                                blocks.push(ContentBlock::Thinking {
+                                    thinking: thinking_content.to_string(),
+                                });
+                            }
+                        } else if !text.is_empty() {
+                            blocks.push(ContentBlock::Text { text: text.clone() });
+                        }
+                    }
+
+                    // Handle function call (tool use)
+                    if let Some(fc) = &part.function_call {
+                        has_tool_use = true;
+                        blocks.push(ContentBlock::ToolUse {
+                            id: format!("gemini_call_{}", blocks.len()),
+                            name: fc.name.clone().unwrap_or_default(),
+                            input: fc.args.clone().unwrap_or(serde_json::Value::Object(Default::default())),
+                        });
+                    }
+                }
+
+                // If there are no content blocks, add an empty text block
+                if blocks.is_empty() {
+                    blocks.push(ContentBlock::Text { text: String::new() });
+                }
+
+                let stop = match c.finish_reason.as_deref() {
+                    Some("STOP") => StopReason::EndTurn,
+                    Some("MAX_TOKENS") => StopReason::MaxTokens,
+                    Some("SAFETY") => StopReason::Error,
+                    Some("RECITATION") => StopReason::Error,
+                    Some("TOOL_USE") => StopReason::ToolUse,
+                    _ => {
+                        if has_tool_use {
+                            StopReason::ToolUse
+                        } else {
+                            StopReason::EndTurn
+                        }
+                    }
+                };
+
+                (blocks, stop)
+            }
+            None => {
+                tracing::warn!(target: "gemini_driver", "No candidates in response");
+                (
+                    vec![ContentBlock::Text { text: String::new() }],
+                    StopReason::EndTurn,
+                )
+            }
+        };
+
+        let usage = api_response.usage_metadata.as_ref();
+        let input_tokens = usage.map(|u| u.prompt_token_count.unwrap_or(0)).unwrap_or(0);
+        let output_tokens = usage.map(|u| u.candidates_token_count.unwrap_or(0)).unwrap_or(0);
+
+        CompletionResponse {
+            content,
+            model,
+            input_tokens,
+            output_tokens,
+            stop_reason,
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Gemini API request types
+// ---------------------------------------------------------------------------
+
+#[derive(Serialize)]
+struct GeminiRequest {
+    contents: Vec<GeminiContent>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    system_instruction: Option<GeminiSystemInstruction>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    generation_config: Option<GeminiGenerationConfig>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    tools: Option<Vec<GeminiTool>>,
+}
+
+#[derive(Serialize)]
+struct GeminiContent {
+    role: String,
+    parts: Vec<GeminiPart>,
+}
+
+#[derive(Serialize, Clone)]
+struct GeminiPart {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    text: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    inline_data: Option<serde_json::Value>,
+    #[serde(rename = "functionCall", skip_serializing_if = "Option::is_none")]
+    function_call: Option<GeminiFunctionCall>,
+    #[serde(rename = "functionResponse", skip_serializing_if = "Option::is_none")]
+    function_response: Option<GeminiFunctionResponse>,
+}
+
+#[derive(Serialize)]
+struct GeminiSystemInstruction {
+    parts: Vec<GeminiPart>,
+}
+
+#[derive(Serialize)]
+struct GeminiGenerationConfig {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    max_output_tokens: Option<u32>,
+    #[serde(rename = "stopSequences", skip_serializing_if = "Option::is_none")]
+    stop_sequences: Option<Vec<String>>,
+}
+
+impl Default for GeminiGenerationConfig {
+    fn default() -> Self {
+        Self {
+            temperature: None,
+            max_output_tokens: None,
+            stop_sequences: None,
+        }
+    }
+}
+
+#[derive(Serialize)]
+struct GeminiTool {
+    #[serde(rename = "functionDeclarations")]
+    function_declarations: Vec<GeminiFunctionDeclaration>,
+}
+
+#[derive(Serialize)]
+struct GeminiFunctionDeclaration {
+    name: String,
+    description: String,
+    parameters: serde_json::Value,
+}
+
+#[derive(Serialize, Clone)]
+struct GeminiFunctionCall {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    name: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    args: Option<serde_json::Value>,
+}
+
+#[derive(Serialize, Clone)]
+struct GeminiFunctionResponse {
+    name: String,
+    response: serde_json::Value,
+}
+
+// ---------------------------------------------------------------------------
+// Gemini API response types
+// ---------------------------------------------------------------------------
+
+#[derive(Deserialize)]
+struct GeminiResponse {
+    #[serde(default)]
+    candidates: Vec<GeminiCandidate>,
+    #[serde(default)]
+    usage_metadata: Option<GeminiUsageMetadata>,
+}
+
+#[derive(Debug, Deserialize)]
+struct GeminiCandidate {
+    #[serde(default)]
+    content: Option<GeminiResponseContent>,
+    #[serde(default)]
+    finish_reason: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+struct GeminiResponseContent {
+    #[serde(default)]
+    parts: Vec<GeminiResponsePart>,
+    #[serde(default)]
+    #[allow(dead_code)]
+    role: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+struct GeminiResponsePart {
+    #[serde(default)]
+    text: Option<String>,
+    #[serde(rename = "functionCall", default)]
+    function_call: Option<GeminiResponseFunctionCall>,
+}
+
+#[derive(Debug, Deserialize)]
+struct GeminiResponseFunctionCall {
+    #[serde(default)]
+    name: Option<String>,
+    #[serde(default)]
+    args: Option<serde_json::Value>,
+}
+
+#[derive(Debug, Deserialize)]
+struct GeminiUsageMetadata {
+    #[serde(default)]
+    prompt_token_count: Option<u32>,
+    #[serde(default)]
+    candidates_token_count: Option<u32>,
+    #[serde(default)]
+    #[allow(dead_code)]
+    total_token_count: Option<u32>,
+}
+
+// ---------------------------------------------------------------------------
+// Gemini streaming types
+// ---------------------------------------------------------------------------
+
+/// Streaming response from the Gemini SSE endpoint.
+/// Each SSE event contains the same structure as the non-streaming response,
+/// but with incremental content.
+#[derive(Debug, Deserialize)]
+struct GeminiStreamResponse {
+    #[serde(default)]
+    candidates: Vec<GeminiCandidate>,
+    #[serde(default)]
+    usage_metadata: Option<GeminiUsageMetadata>,
+}
--- a/crates/zclaw-runtime/src/driver/local.rs
+++ b/crates/zclaw-runtime/src/driver/local.rs
@@ -1,40 +1,250 @@
 //! Local LLM driver (Ollama, LM Studio, vLLM, etc.)
+//!
+//! Uses the OpenAI-compatible API format. The only differences from the
+//! OpenAI driver are: no API key is required, and base_url points to a
+//! local server.

 use async_trait::async_trait;
-use futures::Stream;
+use async_stream::stream;
+use futures::{Stream, StreamExt};
 use reqwest::Client;
+use serde::{Deserialize, Serialize};
 use std::pin::Pin;
 use zclaw_types::{Result, ZclawError};

 use super::{CompletionRequest, CompletionResponse, ContentBlock, LlmDriver, StopReason};
 use crate::stream::StreamChunk;

-/// Local LLM driver for Ollama, LM Studio, vLLM, etc.
-#[allow(dead_code)] // TODO: Implement full Local driver support
+/// Local LLM driver for Ollama, LM Studio, vLLM, and other OpenAI-compatible servers.
 pub struct LocalDriver {
    client: Client,
    base_url: String,
 }

 impl LocalDriver {
+    /// Create a driver pointing at a custom OpenAI-compatible endpoint.
+    ///
+    /// The `base_url` should end with `/v1` (e.g. `http://localhost:8080/v1`).
    pub fn new(base_url: impl Into<String>) -> Self {
        Self {
-            client: Client::new(),
+            client: Client::builder()
+                .user_agent(crate::USER_AGENT)
+                .http1_only()
+                .timeout(std::time::Duration::from_secs(300)) // 5 min -- local inference can be slow
+                .connect_timeout(std::time::Duration::from_secs(10)) // short connect timeout
+                .build()
+                .unwrap_or_else(|_| Client::new()),
            base_url: base_url.into(),
        }
    }

+    /// Ollama default endpoint (`http://localhost:11434/v1`).
    pub fn ollama() -> Self {
        Self::new("http://localhost:11434/v1")
    }

+    /// LM Studio default endpoint (`http://localhost:1234/v1`).
    pub fn lm_studio() -> Self {
        Self::new("http://localhost:1234/v1")
    }

+    /// vLLM default endpoint (`http://localhost:8000/v1`).
    pub fn vllm() -> Self {
        Self::new("http://localhost:8000/v1")
    }
+
+    // ----------------------------------------------------------------
+    // Request / response conversion (OpenAI-compatible format)
+    // ----------------------------------------------------------------
+
+    fn build_api_request(&self, request: &CompletionRequest) -> LocalApiRequest {
+        let messages: Vec<LocalApiMessage> = request
+            .messages
+            .iter()
+            .filter_map(|msg| match msg {
+                zclaw_types::Message::User { content } => Some(LocalApiMessage {
+                    role: "user".to_string(),
+                    content: Some(content.clone()),
+                    tool_calls: None,
+                }),
+                zclaw_types::Message::Assistant {
+                    content,
+                    thinking: _,
+                } => Some(LocalApiMessage {
+                    role: "assistant".to_string(),
+                    content: Some(content.clone()),
+                    tool_calls: None,
+                }),
+                zclaw_types::Message::System { content } => Some(LocalApiMessage {
+                    role: "system".to_string(),
+                    content: Some(content.clone()),
+                    tool_calls: None,
+                }),
+                zclaw_types::Message::ToolUse {
+                    id, tool, input, ..
+                } => {
+                    let args = if input.is_null() {
+                        "{}".to_string()
+                    } else {
+                        serde_json::to_string(input).unwrap_or_else(|_| "{}".to_string())
+                    };
+                    Some(LocalApiMessage {
+                        role: "assistant".to_string(),
+                        content: None,
+                        tool_calls: Some(vec![LocalApiToolCall {
+                            id: id.clone(),
+                            r#type: "function".to_string(),
+                            function: LocalFunctionCall {
+                                name: tool.to_string(),
+                                arguments: args,
+                            },
+                        }]),
+                    })
+                }
+                zclaw_types::Message::ToolResult {
+                    output, is_error, ..
+                } => Some(LocalApiMessage {
+                    role: "tool".to_string(),
+                    content: Some(if *is_error {
+                        format!("Error: {}", output)
+                    } else {
+                        output.to_string()
+                    }),
+                    tool_calls: None,
+                }),
+            })
+            .collect();
+
+        // Prepend system prompt when provided.
+        let mut messages = messages;
+        if let Some(system) = &request.system {
+            messages.insert(
+                0,
+                LocalApiMessage {
+                    role: "system".to_string(),
+                    content: Some(system.clone()),
+                    tool_calls: None,
+                },
+            );
+        }
+
+        let tools: Vec<LocalApiTool> = request
+            .tools
+            .iter()
+            .map(|t| LocalApiTool {
+                r#type: "function".to_string(),
+                function: LocalFunctionDef {
+                    name: t.name.clone(),
+                    description: t.description.clone(),
+                    parameters: t.input_schema.clone(),
+                },
+            })
+            .collect();
+
+        LocalApiRequest {
+            model: request.model.clone(),
+            messages,
+            max_tokens: request.max_tokens,
+            temperature: request.temperature,
+            stop: if request.stop.is_empty() {
+                None
+            } else {
+                Some(request.stop.clone())
+            },
+            stream: request.stream,
+            tools: if tools.is_empty() {
+                None
+            } else {
+                Some(tools)
+            },
+        }
+    }
+
+    fn convert_response(
+        &self,
+        api_response: LocalApiResponse,
+        model: String,
+    ) -> CompletionResponse {
+        let choice = api_response.choices.first();
+
+        let (content, stop_reason) = match choice {
+            Some(c) => {
+                let has_tool_calls = c
+                    .message
+                    .tool_calls
+                    .as_ref()
+                    .map(|tc| !tc.is_empty())
+                    .unwrap_or(false);
+                let has_content = c
+                    .message
+                    .content
+                    .as_ref()
+                    .map(|t| !t.is_empty())
+                    .unwrap_or(false);
+
+                let blocks = if has_tool_calls {
+                    let tool_calls = c.message.tool_calls.as_ref().unwrap();
+                    tool_calls
+                        .iter()
+                        .map(|tc| {
+                            let input: serde_json::Value =
+                                serde_json::from_str(&tc.function.arguments)
+                                    .unwrap_or(serde_json::Value::Null);
+                            ContentBlock::ToolUse {
+                                id: tc.id.clone(),
+                                name: tc.function.name.clone(),
+                                input,
+                            }
+                        })
+                        .collect()
+                } else if has_content {
+                    vec![ContentBlock::Text {
+                        text: c.message.content.clone().unwrap(),
+                    }]
+                } else {
+                    vec![ContentBlock::Text {
+                        text: String::new(),
+                    }]
+                };
+
+                let stop = match c.finish_reason.as_deref() {
+                    Some("stop") => StopReason::EndTurn,
+                    Some("length") => StopReason::MaxTokens,
+                    Some("tool_calls") => StopReason::ToolUse,
+                    _ => StopReason::EndTurn,
+                };
+
+                (blocks, stop)
+            }
+            None => (
+                vec![ContentBlock::Text {
+                    text: String::new(),
+                }],
+                StopReason::EndTurn,
+            ),
+        };
+
+        let (input_tokens, output_tokens) = api_response
+            .usage
+            .map(|u| (u.prompt_tokens, u.completion_tokens))
+            .unwrap_or((0, 0));
+
+        CompletionResponse {
+            content,
+            model,
+            input_tokens,
+            output_tokens,
+            stop_reason,
+        }
+    }
+
+    /// Build the `reqwest::RequestBuilder` with an optional Authorization header.
+    ///
+    /// Ollama does not need one; LM Studio / vLLM may be configured with an
+    /// optional API key.  We send the header only when a key is present.
+    fn authenticated_post(&self, url: &str) -> reqwest::RequestBuilder {
+        self.client.post(url).header("Accept", "*/*")
+    }
 }

 #[async_trait]
@@ -44,30 +254,394 @@ impl LlmDriver for LocalDriver {
    }

    fn is_configured(&self) -> bool {
-        // Local drivers don't require API keys
+        // Local drivers never require an API key.
        true
    }

    async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
-        // TODO: Implement actual API call (OpenAI-compatible)
-        Ok(CompletionResponse {
-            content: vec![ContentBlock::Text {
-                text: "Local driver not yet implemented".to_string(),
-            }],
-            model: request.model,
-            input_tokens: 0,
-            output_tokens: 0,
-            stop_reason: StopReason::EndTurn,
-        })
+        let api_request = self.build_api_request(&request);
+        let url = format!("{}/chat/completions", self.base_url);
+
+        tracing::debug!(target: "local_driver", "Sending request to {}", url);
+        tracing::trace!(
+            target: "local_driver",
+            "Request body: {}",
+            serde_json::to_string(&api_request).unwrap_or_default()
+        );
+
+        let response = self
+            .authenticated_post(&url)
+            .json(&api_request)
+            .send()
+            .await
+            .map_err(|e| {
+                let hint = connection_error_hint(&e);
+                ZclawError::LlmError(format!("Failed to connect to local LLM server at {}: {}{}", self.base_url, e, hint))
+            })?;
+
+        if !response.status().is_success() {
+            let status = response.status();
+            let body = response.text().await.unwrap_or_default();
+            tracing::warn!(target: "local_driver", "API error {}: {}", status, body);
+            return Err(ZclawError::LlmError(format!(
+                "Local LLM API error {}: {}",
+                status, body
+            )));
+        }
+
+        let api_response: LocalApiResponse = response
+            .json()
+            .await
+            .map_err(|e| ZclawError::LlmError(format!("Failed to parse response: {}", e)))?;
+
+        Ok(self.convert_response(api_response, request.model))
    }

    fn stream(
        &self,
-        _request: CompletionRequest,
+        request: CompletionRequest,
    ) -> Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send + '_>> {
-        // Placeholder - return error stream
-        Box::pin(futures::stream::once(async {
-            Err(ZclawError::LlmError("Local driver streaming not yet implemented".to_string()))
-        }))
+        let mut stream_request = self.build_api_request(&request);
+        stream_request.stream = true;
+
+        let url = format!("{}/chat/completions", self.base_url);
+        tracing::debug!(target: "local_driver", "Starting stream to {}", url);
+
+        Box::pin(stream! {
+            let response = match self
+                .authenticated_post(&url)
+                .header("Content-Type", "application/json")
+                .timeout(std::time::Duration::from_secs(300))
+                .json(&stream_request)
+                .send()
+                .await
+            {
+                Ok(r) => {
+                    tracing::debug!(target: "local_driver", "Stream response status: {}", r.status());
+                    r
+                }
+                Err(e) => {
+                    let hint = connection_error_hint(&e);
+                    tracing::error!(target: "local_driver", "Stream connection failed: {}{}", e, hint);
+                    yield Err(ZclawError::LlmError(format!(
+                        "Failed to connect to local LLM server at {}: {}{}",
+                        self.base_url, e, hint
+                    )));
+                    return;
+                }
+            };
+
+            if !response.status().is_success() {
+                let status = response.status();
+                let body = response.text().await.unwrap_or_default();
+                yield Err(ZclawError::LlmError(format!("API error {}: {}", status, body)));
+                return;
+            }
+
+            let mut byte_stream = response.bytes_stream();
+            let mut accumulated_tool_calls: std::collections::HashMap<String, (String, String)> =
+                std::collections::HashMap::new();
+            let mut current_tool_id: Option<String> = None;
+
+            while let Some(chunk_result) = byte_stream.next().await {
+                let chunk = match chunk_result {
+                    Ok(c) => c,
+                    Err(e) => {
+                        yield Err(ZclawError::LlmError(format!("Stream error: {}", e)));
+                        continue;
+                    }
+                };
+
+                let text = String::from_utf8_lossy(&chunk);
+                for line in text.lines() {
+                    if let Some(data) = line.strip_prefix("data: ") {
+                        if data == "[DONE]" {
+                            tracing::debug!(
+                                target: "local_driver",
+                                "Stream done, tool_calls accumulated: {}",
+                                accumulated_tool_calls.len()
+                            );
+
+                            for (id, (name, args)) in &accumulated_tool_calls {
+                                if name.is_empty() {
+                                    tracing::warn!(
+                                        target: "local_driver",
+                                        "Skipping tool call with empty name: id={}",
+                                        id
+                                    );
+                                    continue;
+                                }
+                                let parsed_args: serde_json::Value = if args.is_empty() {
+                                    serde_json::json!({})
+                                } else {
+                                    serde_json::from_str(args).unwrap_or_else(|e| {
+                                        tracing::warn!(
+                                            target: "local_driver",
+                                            "Failed to parse tool args '{}': {}",
+                                            args, e
+                                        );
+                                        serde_json::json!({})
+                                    })
+                                };
+                                yield Ok(StreamChunk::ToolUseEnd {
+                                    id: id.clone(),
+                                    input: parsed_args,
+                                });
+                            }
+
+                            yield Ok(StreamChunk::Complete {
+                                input_tokens: 0,
+                                output_tokens: 0,
+                                stop_reason: "end_turn".to_string(),
+                            });
+                            continue;
+                        }
+
+                        match serde_json::from_str::<LocalStreamResponse>(data) {
+                            Ok(resp) => {
+                                if let Some(choice) = resp.choices.first() {
+                                    let delta = &choice.delta;
+
+                                    // Text content
+                                    if let Some(content) = &delta.content {
+                                        if !content.is_empty() {
+                                            yield Ok(StreamChunk::TextDelta {
+                                                delta: content.clone(),
+                                            });
+                                        }
+                                    }
+
+                                    // Tool calls
+                                    if let Some(tool_calls) = &delta.tool_calls {
+                                        for tc in tool_calls {
+                                            // Tool call start
+                                            if let Some(id) = &tc.id {
+                                                let name = tc
+                                                    .function
+                                                    .as_ref()
+                                                    .and_then(|f| f.name.clone())
+                                                    .unwrap_or_default();
+
+                                                if !name.is_empty() {
+                                                    current_tool_id = Some(id.clone());
+                                                    accumulated_tool_calls
+                                                        .insert(id.clone(), (name.clone(), String::new()));
+                                                    yield Ok(StreamChunk::ToolUseStart {
+                                                        id: id.clone(),
+                                                        name,
+                                                    });
+                                                } else {
+                                                    current_tool_id = Some(id.clone());
+                                                    accumulated_tool_calls
+                                                        .insert(id.clone(), (String::new(), String::new()));
+                                                }
+                                            }
+
+                                            // Tool call delta
+                                            if let Some(function) = &tc.function {
+                                                if let Some(args) = &function.arguments {
+                                                    let tool_id = tc
+                                                        .id
+                                                        .as_ref()
+                                                        .or(current_tool_id.as_ref())
+                                                        .cloned()
+                                                        .unwrap_or_default();
+
+                                                    yield Ok(StreamChunk::ToolUseDelta {
+                                                        id: tool_id.clone(),
+                                                        delta: args.clone(),
+                                                    });
+
+                                                    if let Some(entry) =
+                                                        accumulated_tool_calls.get_mut(&tool_id)
+                                                    {
+                                                        entry.1.push_str(args);
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                            Err(e) => {
+                                tracing::warn!(
+                                    target: "local_driver",
+                                    "Failed to parse SSE: {}, data: {}",
+                                    e, data
+                                );
+                            }
+                        }
+                    }
+                }
+            }
+        })
    }
 }
+
+// ---------------------------------------------------------------------------
+// Connection-error diagnostics
+// ---------------------------------------------------------------------------
+
+/// Return a human-readable hint when the local server appears to be unreachable.
+fn connection_error_hint(error: &reqwest::Error) -> String {
+    if error.is_connect() {
+        format!(
+            "\n\nHint: Is the local LLM server running at {}?\n\
+             Make sure the server is started before using this driver.",
+            // Extract just the host:port from whatever error we have.
+            "localhost"
+        )
+    } else if error.is_timeout() {
+        "\n\nHint: The request timed out. Local inference can be slow -- \
+         try a smaller model or increase the timeout."
+            .to_string()
+    } else {
+        String::new()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI-compatible API types (private to this module)
+// ---------------------------------------------------------------------------
+
+#[derive(Serialize)]
+struct LocalApiRequest {
+    model: String,
+    messages: Vec<LocalApiMessage>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    max_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    stop: Option<Vec<String>>,
+    #[serde(default)]
+    stream: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    tools: Option<Vec<LocalApiTool>>,
+}
+
+#[derive(Serialize)]
+struct LocalApiMessage {
+    role: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    content: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    tool_calls: Option<Vec<LocalApiToolCall>>,
+}
+
+#[derive(Serialize)]
+struct LocalApiToolCall {
+    id: String,
+    r#type: String,
+    function: LocalFunctionCall,
+}
+
+#[derive(Serialize)]
+struct LocalFunctionCall {
+    name: String,
+    arguments: String,
+}
+
+#[derive(Serialize)]
+struct LocalApiTool {
+    r#type: String,
+    function: LocalFunctionDef,
+}
+
+#[derive(Serialize)]
+struct LocalFunctionDef {
+    name: String,
+    description: String,
+    parameters: serde_json::Value,
+}
+
+// --- Response types ---
+
+#[derive(Deserialize, Default)]
+struct LocalApiResponse {
+    #[serde(default)]
+    choices: Vec<LocalApiChoice>,
+    #[serde(default)]
+    usage: Option<LocalApiUsage>,
+}
+
+#[derive(Deserialize, Default)]
+struct LocalApiChoice {
+    #[serde(default)]
+    message: LocalApiResponseMessage,
+    #[serde(default)]
+    finish_reason: Option<String>,
+}
+
+#[derive(Deserialize, Default)]
+struct LocalApiResponseMessage {
+    #[serde(default)]
+    content: Option<String>,
+    #[serde(default)]
+    tool_calls: Option<Vec<LocalApiToolCallResponse>>,
+}
+
+#[derive(Deserialize, Default)]
+struct LocalApiToolCallResponse {
+    #[serde(default)]
+    id: String,
+    #[serde(default)]
+    function: LocalFunctionCallResponse,
+}
+
+#[derive(Deserialize, Default)]
+struct LocalFunctionCallResponse {
+    #[serde(default)]
+    name: String,
+    #[serde(default)]
+    arguments: String,
+}
+
+#[derive(Deserialize, Default)]
+struct LocalApiUsage {
+    #[serde(default)]
+    prompt_tokens: u32,
+    #[serde(default)]
+    completion_tokens: u32,
+}
+
+// --- Streaming types ---
+
+#[derive(Debug, Deserialize)]
+struct LocalStreamResponse {
+    #[serde(default)]
+    choices: Vec<LocalStreamChoice>,
+}
+
+#[derive(Debug, Deserialize)]
+struct LocalStreamChoice {
+    #[serde(default)]
+    delta: LocalDelta,
+    #[serde(default)]
+    #[allow(dead_code)] // Deserialized from SSE, not accessed in code
+    finish_reason: Option<String>,
+}
+
+#[derive(Debug, Deserialize, Default)]
+struct LocalDelta {
+    #[serde(default)]
+    content: Option<String>,
+    #[serde(default)]
+    tool_calls: Option<Vec<LocalToolCallDelta>>,
+}
+
+#[derive(Debug, Deserialize)]
+struct LocalToolCallDelta {
+    #[serde(default)]
+    id: Option<String>,
+    #[serde(default)]
+    function: Option<LocalFunctionDelta>,
+}
+
+#[derive(Debug, Deserialize)]
+struct LocalFunctionDelta {
+    #[serde(default)]
+    name: Option<String>,
+    #[serde(default)]
+    arguments: Option<String>,
+}
--- a/crates/zclaw-runtime/src/lib.rs
+++ b/crates/zclaw-runtime/src/lib.rs
@@ -12,6 +12,7 @@ pub mod loop_runner;
 pub mod loop_guard;
 pub mod stream;
 pub mod growth;
+pub mod compaction;

 // Re-export main types
 pub use driver::{
--- a/crates/zclaw-runtime/src/loop_runner.rs
+++ b/crates/zclaw-runtime/src/loop_runner.rs
@@ -11,6 +11,7 @@ use crate::tool::{ToolRegistry, ToolContext, SkillExecutor};
 use crate::tool::builtin::PathValidator;
 use crate::loop_guard::LoopGuard;
 use crate::growth::GrowthIntegration;
+use crate::compaction;
 use zclaw_memory::MemoryStore;

 /// Agent loop runner
@@ -29,6 +30,8 @@ pub struct AgentLoop {
    path_validator: Option<PathValidator>,
    /// Growth system integration (optional)
    growth: Option<GrowthIntegration>,
+    /// Compaction threshold in tokens (0 = disabled)
+    compaction_threshold: usize,
 }

 impl AgentLoop {
@@ -51,6 +54,7 @@ impl AgentLoop {
            skill_executor: None,
            path_validator: None,
            growth: None,
+            compaction_threshold: 0,
        }
    }

@@ -101,6 +105,16 @@ impl AgentLoop {
        self.growth = Some(growth);
    }

+    /// Set compaction threshold in tokens (0 = disabled)
+    ///
+    /// When the estimated token count of conversation history exceeds this
+    /// threshold, older messages are summarized into a single system message
+    /// and only recent messages are sent to the LLM.
+    pub fn with_compaction_threshold(mut self, threshold: usize) -> Self {
+        self.compaction_threshold = threshold;
+        self
+    }
+
    /// Get growth integration reference
    pub fn growth(&self) -> Option<&GrowthIntegration> {
        self.growth.as_ref()
@@ -134,6 +148,11 @@ impl AgentLoop {
        // Get all messages for context
        let mut messages = self.memory.get_messages(&session_id).await?;

+        // Apply compaction if threshold is configured
+        if self.compaction_threshold > 0 {
+            messages = compaction::maybe_compact(messages, self.compaction_threshold);
+        }
+
        // Enhance system prompt with growth memories
        let enhanced_prompt = if let Some(ref growth) = self.growth {
            let base = self.system_prompt.as_deref().unwrap_or("");
@@ -260,7 +279,12 @@ impl AgentLoop {
        self.memory.append_message(&session_id, &user_message).await?;

        // Get all messages for context
-        let messages = self.memory.get_messages(&session_id).await?;
+        let mut messages = self.memory.get_messages(&session_id).await?;
+
+        // Apply compaction if threshold is configured
+        if self.compaction_threshold > 0 {
+            messages = compaction::maybe_compact(messages, self.compaction_threshold);
+        }

        // Enhance system prompt with growth memories
        let enhanced_prompt = if let Some(ref growth) = self.growth {