feat(viking): add local server management for privacy-first deployment

Backend (Rust): - viking_commands.rs: Tauri commands for server status/start/stop/restart - memory/mod.rs: Memory module exports - memory/context_builder.rs: Context building with memory injection - memory/extractor.rs: Memory extraction from conversations - llm/mod.rs: LLM integration for memory summarization Frontend (TypeScript): - context-builder.ts: Context building with OpenViking integration - viking-client.ts: OpenViking API client - viking-local.ts: Local storage fallback when Viking unavailable - viking-memory-adapter.ts: Memory extraction and persistence Features: - Multi-mode adapter (local/sidecar/remote) with auto-detection - Privacy-first: all data stored in ~/.openviking/, server only on 127.0.0.1 - Graceful degradation when local server unavailable - Context compaction with memory flush before compression Tests: 21 passing (viking-adapter.test.ts) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 09:59:14 +08:00
parent 26e64a3fff
commit 134798c430
10 changed files with 3378 additions and 0 deletions
--- a/desktop/src-tauri/src/memory/context_builder.rs
+++ b/desktop/src-tauri/src/memory/context_builder.rs
@@ -0,0 +1,512 @@
+//! Context Builder - L0/L1/L2 Layered Context Loading
+//!
+//! Implements token-efficient context building for agent prompts.
+//! This supplements OpenViking CLI which lacks layered context loading.
+//!
+//! Layers:
+//! - L0 (Quick Scan): Fast vector similarity search, returns overview only
+//! - L1 (Standard): Load overview for top candidates, moderate detail
+//! - L2 (Deep): Load full content for most relevant items
+//!
+//! Reference: ZCLAW_AGENT_INTELLIGENCE_EVOLUTION.md §4.3
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+// === Types ===
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "UPPERCASE")]
+pub enum ContextLevel {
+    L0, // Quick scan
+    L1, // Standard detail
+    L2, // Full content
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ContextItem {
+    pub uri: String,
+    pub content: String,
+    pub score: f64,
+    pub level: ContextLevel,
+    pub category: String,
+    pub tokens: u32,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RetrievalStep {
+    pub uri: String,
+    pub score: f64,
+    pub action: String, // "entered" | "skipped" | "matched"
+    pub level: ContextLevel,
+    pub children_explored: Option<u32>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RetrievalTrace {
+    pub query: String,
+    pub steps: Vec<RetrievalStep>,
+    pub total_tokens_used: u32,
+    pub tokens_by_level: HashMap<String, u32>,
+    pub duration_ms: u64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct EnhancedContext {
+    pub system_prompt_addition: String,
+    pub items: Vec<ContextItem>,
+    pub total_tokens: u32,
+    pub tokens_by_level: HashMap<String, u32>,
+    pub trace: Option<RetrievalTrace>,
+}
+
+#[derive(Debug, Clone)]
+pub struct ContextBuilderConfig {
+    /// Maximum tokens for context
+    pub max_tokens: u32,
+    /// L0 scan limit (number of candidates)
+    pub l0_limit: u32,
+    /// L1 load limit (number of detailed items)
+    pub l1_limit: u32,
+    /// L2 full content limit (number of deep items)
+    pub l2_limit: u32,
+    /// Minimum relevance score (0.0 - 1.0)
+    pub min_score: f64,
+    /// Enable retrieval trace
+    pub enable_trace: bool,
+    /// Token reserve (keep this many tokens free)
+    pub token_reserve: u32,
+}
+
+impl Default for ContextBuilderConfig {
+    fn default() -> Self {
+        Self {
+            max_tokens: 8000,
+            l0_limit: 50,
+            l1_limit: 15,
+            l2_limit: 3,
+            min_score: 0.5,
+            enable_trace: true,
+            token_reserve: 500,
+        }
+    }
+}
+
+// === Context Builder ===
+
+pub struct ContextBuilder {
+    config: ContextBuilderConfig,
+    last_trace: Option<RetrievalTrace>,
+}
+
+impl ContextBuilder {
+    pub fn new(config: ContextBuilderConfig) -> Self {
+        Self {
+            config,
+            last_trace: None,
+        }
+    }
+
+    /// Get the last retrieval trace
+    pub fn get_last_trace(&self) -> Option<&RetrievalTrace> {
+        self.last_trace.as_ref()
+    }
+
+    /// Build enhanced context from a query
+    ///
+    /// This is the main entry point for context building.
+    /// It performs L0 scan, then progressively loads L1/L2 content.
+    pub async fn build_context(
+        &mut self,
+        query: &str,
+        agent_id: &str,
+        viking_find: impl Fn(&str, Option<&str>, u32) -> Result<Vec<FindResult>, String>,
+        viking_read: impl Fn(&str, ContextLevel) -> Result<String, String>,
+    ) -> Result<EnhancedContext, String> {
+        let start_time = std::time::Instant::now();
+        let mut tokens_by_level: HashMap<String, u32> =
+            [("L0".to_string(), 0), ("L1".to_string(), 0), ("L2".to_string(), 0)]
+                .into_iter()
+                .collect();
+
+        let mut trace_steps: Vec<RetrievalStep> = Vec::new();
+        let mut context_items: Vec<ContextItem> = Vec::new();
+
+        // === Phase 1: L0 Quick Scan ===
+        // Fast vector search across user + agent memories
+
+        let user_scope = "viking://user/memories";
+        let agent_scope = &format!("viking://agent/{}/memories", agent_id);
+
+        let user_l0 = viking_find(query, Some(user_scope), self.config.l0_limit)
+            .unwrap_or_default();
+        let agent_l0 = viking_find(query, Some(agent_scope), self.config.l0_limit)
+            .unwrap_or_default();
+
+        // Combine and sort by score
+        let mut all_l0: Vec<FindResult> = [user_l0, agent_l0]
+            .concat()
+            .into_iter()
+            .collect();
+
+        all_l0.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+
+        // Record L0 tokens
+        let l0_tokens: u32 = all_l0.iter().map(|r| estimate_tokens(&r.overview)).sum();
+        *tokens_by_level.get_mut("L0").unwrap() = l0_tokens;
+
+        // Record trace steps for L0
+        for result in &all_l0 {
+            trace_steps.push(RetrievalStep {
+                uri: result.uri.clone(),
+                score: result.score,
+                action: if result.score >= self.config.min_score {
+                    "entered"
+                } else {
+                    "skipped"
+                }
+                .to_string(),
+                level: ContextLevel::L0,
+                children_explored: None,
+            });
+        }
+
+        // === Phase 2: L1 Standard Loading ===
+        // Load overview for top candidates within token budget
+
+        let candidates: Vec<&FindResult> = all_l0
+            .iter()
+            .filter(|r| r.score >= self.config.min_score)
+            .take(self.config.l1_limit as usize)
+            .collect();
+
+        let mut token_budget = self.config.max_tokens.saturating_sub(self.config.token_reserve);
+
+        for candidate in candidates {
+            if token_budget < 200 {
+                break; // Need at least 200 tokens for meaningful content
+            }
+
+            match viking_read(&candidate.uri, ContextLevel::L1) {
+                Ok(content) => {
+                    let tokens = estimate_tokens(&content);
+                    if tokens <= token_budget {
+                        context_items.push(ContextItem {
+                            uri: candidate.uri.clone(),
+                            content,
+                            score: candidate.score,
+                            level: ContextLevel::L1,
+                            category: extract_category(&candidate.uri),
+                            tokens,
+                        });
+                        token_budget -= tokens;
+                        *tokens_by_level.get_mut("L1").unwrap() += tokens;
+                    }
+                }
+                Err(e) => {
+                    eprintln!("[ContextBuilder] Failed to read L1 for {}: {}", candidate.uri, e);
+                }
+            }
+        }
+
+        // === Phase 3: L2 Deep Loading ===
+        // Load full content for top 3 most relevant items
+        // Collect items to upgrade first (avoid borrow conflicts)
+        let deep_candidates: Vec<(String, u32)> = context_items
+            .iter()
+            .filter(|i| i.level == ContextLevel::L1)
+            .take(self.config.l2_limit as usize)
+            .map(|i| (i.uri.clone(), i.tokens))
+            .collect();
+
+        for (uri, old_tokens) in deep_candidates {
+            if token_budget < 500 {
+                break; // Need at least 500 tokens for full content
+            }
+
+            match viking_read(&uri, ContextLevel::L2) {
+                Ok(full_content) => {
+                    let tokens = estimate_tokens(&full_content);
+                    if tokens <= token_budget {
+                        // Update the item with L2 content
+                        if let Some(context_item) = context_items.iter_mut().find(|i| i.uri == uri) {
+                            context_item.content = full_content;
+                            context_item.level = ContextLevel::L2;
+                            context_item.tokens = tokens;
+                            *tokens_by_level.get_mut("L2").unwrap() += tokens;
+                            *tokens_by_level.get_mut("L1").unwrap() -= old_tokens;
+                            token_budget -= tokens.saturating_sub(old_tokens);
+                        }
+                    }
+                }
+                Err(e) => {
+                    eprintln!("[ContextBuilder] Failed to read L2 for {}: {}", uri, e);
+                }
+            }
+        }
+
+        // === Build Output ===
+
+        let total_tokens: u32 = tokens_by_level.values().sum();
+        let system_prompt_addition = format_context_for_prompt(&context_items);
+
+        // Build retrieval trace
+        let duration_ms = start_time.elapsed().as_millis() as u64;
+        let trace = if self.config.enable_trace {
+            Some(RetrievalTrace {
+                query: query.to_string(),
+                steps: trace_steps,
+                total_tokens_used: total_tokens,
+                tokens_by_level: tokens_by_level.clone(),
+                duration_ms,
+            })
+        } else {
+            None
+        };
+
+        self.last_trace = trace.clone();
+
+        Ok(EnhancedContext {
+            system_prompt_addition,
+            items: context_items,
+            total_tokens,
+            tokens_by_level,
+            trace,
+        })
+    }
+
+    /// Build context with pre-fetched L0 results
+    pub fn build_context_from_l0(
+        &mut self,
+        query: &str,
+        l0_results: Vec<FindResult>,
+        viking_read: impl Fn(&str, ContextLevel) -> Result<String, String>,
+    ) -> Result<EnhancedContext, String> {
+        // Similar to build_context but uses pre-fetched L0 results
+        let start_time = std::time::Instant::now();
+        let mut tokens_by_level: HashMap<String, u32> =
+            [("L0".to_string(), 0), ("L1".to_string(), 0), ("L2".to_string(), 0)]
+                .into_iter()
+                .collect();
+
+        let mut trace_steps: Vec<RetrievalStep> = Vec::new();
+        let mut context_items: Vec<ContextItem> = Vec::new();
+
+        // Sort by score
+        let mut all_l0 = l0_results;
+        all_l0.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+
+        // Record L0 tokens
+        let l0_tokens: u32 = all_l0.iter().map(|r| estimate_tokens(&r.overview)).sum();
+        *tokens_by_level.get_mut("L0").unwrap() = l0_tokens;
+
+        // Record trace steps
+        for result in &all_l0 {
+            trace_steps.push(RetrievalStep {
+                uri: result.uri.clone(),
+                score: result.score,
+                action: if result.score >= self.config.min_score {
+                    "entered"
+                } else {
+                    "skipped"
+                }
+                .to_string(),
+                level: ContextLevel::L0,
+                children_explored: None,
+            });
+        }
+
+        // L1 loading
+        let candidates: Vec<&FindResult> = all_l0
+            .iter()
+            .filter(|r| r.score >= self.config.min_score)
+            .take(self.config.l1_limit as usize)
+            .collect();
+
+        let mut token_budget = self.config.max_tokens.saturating_sub(self.config.token_reserve);
+
+        for candidate in candidates {
+            if token_budget < 200 {
+                break;
+            }
+
+            match viking_read(&candidate.uri, ContextLevel::L1) {
+                Ok(content) => {
+                    let tokens = estimate_tokens(&content);
+                    if tokens <= token_budget {
+                        context_items.push(ContextItem {
+                            uri: candidate.uri.clone(),
+                            content,
+                            score: candidate.score,
+                            level: ContextLevel::L1,
+                            category: extract_category(&candidate.uri),
+                            tokens,
+                        });
+                        token_budget -= tokens;
+                        *tokens_by_level.get_mut("L1").unwrap() += tokens;
+                    }
+                }
+                Err(_) => continue,
+            }
+        }
+
+        // L2 loading - collect updates first to avoid borrow conflicts
+        let deep_candidates: Vec<(String, u32)> = context_items
+            .iter()
+            .take(self.config.l2_limit as usize)
+            .map(|item| (item.uri.clone(), item.tokens))
+            .collect();
+
+        for (uri, old_tokens) in deep_candidates {
+            if token_budget < 500 {
+                break;
+            }
+
+            match viking_read(&uri, ContextLevel::L2) {
+                Ok(full_content) => {
+                    let tokens = estimate_tokens(&full_content);
+                    if tokens <= token_budget {
+                        if let Some(context_item) = context_items.iter_mut().find(|i| i.uri == uri) {
+                            context_item.content = full_content;
+                            context_item.level = ContextLevel::L2;
+                            context_item.tokens = tokens;
+                            *tokens_by_level.get_mut("L2").unwrap() += tokens;
+                            *tokens_by_level.get_mut("L1").unwrap() -= old_tokens;
+                        }
+                    }
+                }
+                Err(_) => continue,
+            }
+        }
+
+        let total_tokens: u32 = tokens_by_level.values().sum();
+        let system_prompt_addition = format_context_for_prompt(&context_items);
+        let duration_ms = start_time.elapsed().as_millis() as u64;
+
+        let trace = if self.config.enable_trace {
+            Some(RetrievalTrace {
+                query: query.to_string(),
+                steps: trace_steps,
+                total_tokens_used: total_tokens,
+                tokens_by_level: tokens_by_level.clone(),
+                duration_ms,
+            })
+        } else {
+            None
+        };
+
+        self.last_trace = trace.clone();
+
+        Ok(EnhancedContext {
+            system_prompt_addition,
+            items: context_items,
+            total_tokens,
+            tokens_by_level,
+            trace,
+        })
+    }
+}
+
+// === Helper Functions ===
+
+/// Estimate token count for text
+fn estimate_tokens(text: &str) -> u32 {
+    // ~1.5 tokens per CJK character, ~0.4 tokens per ASCII character
+    let cjk_count = text.chars().filter(|c| ('\u{4E00}'..='\u{9FFF}').contains(c)).count();
+    let other_count = text.chars().count() - cjk_count;
+    ((cjk_count as f32 * 1.5 + other_count as f32 * 0.4).ceil() as u32).max(1)
+}
+
+/// Extract category from URI
+fn extract_category(uri: &str) -> String {
+    let parts: Vec<&str> = uri.strip_prefix("viking://").unwrap_or(uri).split('/').collect();
+    // Return 3rd segment as category (e.g., "preferences" from viking://user/memories/preferences/...)
+    parts.get(2).or(parts.get(1)).unwrap_or(&"unknown").to_string()
+}
+
+/// Format context items for system prompt
+fn format_context_for_prompt(items: &[ContextItem]) -> String {
+    if items.is_empty() {
+        return String::new();
+    }
+
+    let user_items: Vec<&ContextItem> = items
+        .iter()
+        .filter(|i| i.uri.starts_with("viking://user/"))
+        .collect();
+
+    let agent_items: Vec<&ContextItem> = items
+        .iter()
+        .filter(|i| i.uri.starts_with("viking://agent/"))
+        .collect();
+
+    let mut sections: Vec<String> = Vec::new();
+
+    if !user_items.is_empty() {
+        sections.push("## 用户记忆".to_string());
+        for item in user_items {
+            sections.push(format!("- [{}] {}", item.category, item.content));
+        }
+    }
+
+    if !agent_items.is_empty() {
+        sections.push("## Agent 经验".to_string());
+        for item in agent_items {
+            sections.push(format!("- [{}] {}", item.category, item.content));
+        }
+    }
+
+    sections.join("\n")
+}
+
+// === External Types (for viking_find callback) ===
+
+#[derive(Debug, Clone)]
+pub struct FindResult {
+    pub uri: String,
+    pub score: f64,
+    pub overview: String,
+}
+
+// === Tauri Commands ===
+
+#[tauri::command]
+pub fn estimate_content_tokens(content: String) -> u32 {
+    estimate_tokens(&content)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_estimate_tokens() {
+        assert!(estimate_tokens("Hello world") > 0);
+        assert!(estimate_tokens("你好世界") > estimate_tokens("Hello"));
+    }
+
+    #[test]
+    fn test_extract_category() {
+        assert_eq!(
+            extract_category("viking://user/memories/preferences/dark_mode"),
+            "preferences"
+        );
+        assert_eq!(
+            extract_category("viking://agent/main/lessons/lesson1"),
+            "lessons"
+        );
+    }
+
+    #[test]
+    fn test_context_builder_config_default() {
+        let config = ContextBuilderConfig::default();
+        assert_eq!(config.max_tokens, 8000);
+        assert_eq!(config.l0_limit, 50);
+        assert_eq!(config.l1_limit, 15);
+        assert_eq!(config.l2_limit, 3);
+    }
+}
--- a/desktop/src-tauri/src/memory/extractor.rs
+++ b/desktop/src-tauri/src/memory/extractor.rs
@@ -0,0 +1,506 @@
+//! Session Memory Extractor
+//!
+//! Extracts structured memories from conversation sessions using LLM analysis.
+//! This supplements OpenViking CLI which lacks built-in memory extraction.
+//!
+//! Categories:
+//! - user_preference: User's stated preferences and settings
+//! - user_fact: Facts about the user (name, role, projects, etc.)
+//! - agent_lesson: Lessons learned by the agent from interactions
+//! - agent_pattern: Recurring patterns the agent should remember
+//! - task: Task-related information for follow-up
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+// === Types ===
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum MemoryCategory {
+    UserPreference,
+    UserFact,
+    AgentLesson,
+    AgentPattern,
+    Task,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ExtractedMemory {
+    pub category: MemoryCategory,
+    pub content: String,
+    pub tags: Vec<String>,
+    pub importance: u8, // 1-10 scale
+    pub suggested_uri: String,
+    pub reasoning: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ExtractionResult {
+    pub memories: Vec<ExtractedMemory>,
+    pub summary: String,
+    pub tokens_saved: Option<u32>,
+    pub extraction_time_ms: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct ExtractionConfig {
+    /// Maximum memories to extract per session
+    pub max_memories: usize,
+    /// Minimum importance threshold (1-10)
+    pub min_importance: u8,
+    /// Whether to include reasoning in output
+    pub include_reasoning: bool,
+    /// Agent ID for URI generation
+    pub agent_id: String,
+}
+
+impl Default for ExtractionConfig {
+    fn default() -> Self {
+        Self {
+            max_memories: 10,
+            min_importance: 5,
+            include_reasoning: true,
+            agent_id: "zclaw-main".to_string(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChatMessage {
+    pub role: String,
+    pub content: String,
+    pub timestamp: Option<String>,
+}
+
+// === Session Extractor ===
+
+pub struct SessionExtractor {
+    config: ExtractionConfig,
+    llm_endpoint: Option<String>,
+    api_key: Option<String>,
+}
+
+impl SessionExtractor {
+    pub fn new(config: ExtractionConfig) -> Self {
+        Self {
+            config,
+            llm_endpoint: None,
+            api_key: None,
+        }
+    }
+
+    /// Configure LLM endpoint for extraction
+    pub fn with_llm(mut self, endpoint: String, api_key: String) -> Self {
+        self.llm_endpoint = Some(endpoint);
+        self.api_key = Some(api_key);
+        self
+    }
+
+    /// Extract memories from a conversation session
+    pub async fn extract(&self, messages: &[ChatMessage]) -> Result<ExtractionResult, String> {
+        let start_time = std::time::Instant::now();
+
+        // Build extraction prompt
+        let prompt = self.build_extraction_prompt(messages);
+
+        // Call LLM for extraction
+        let response = self.call_llm(&prompt).await?;
+
+        // Parse LLM response into structured memories
+        let memories = self.parse_extraction(&response)?;
+
+        // Filter by importance and limit
+        let filtered: Vec<ExtractedMemory> = memories
+            .into_iter()
+            .filter(|m| m.importance >= self.config.min_importance)
+            .take(self.config.max_memories)
+            .collect();
+
+        // Generate session summary
+        let summary = self.generate_summary(&filtered);
+
+        let elapsed = start_time.elapsed().as_millis() as u64;
+
+        Ok(ExtractionResult {
+            tokens_saved: Some(self.estimate_tokens_saved(messages, &summary)),
+            memories: filtered,
+            summary,
+            extraction_time_ms: elapsed,
+        })
+    }
+
+    /// Build the extraction prompt for the LLM
+    fn build_extraction_prompt(&self, messages: &[ChatMessage]) -> String {
+        let conversation = messages
+            .iter()
+            .map(|m| format!("[{}]: {}", m.role, m.content))
+            .collect::<Vec<_>>()
+            .join("\n\n");
+
+        format!(
+            r#"Analyze the following conversation and extract structured memories.
+Focus on information that would be useful for future interactions.
+
+## Conversation
+{}
+
+## Extraction Instructions
+Extract memories in these categories:
+- user_preference: User's stated preferences (UI preferences, workflow preferences, tool choices)
+- user_fact: Facts about the user (name, role, projects, skills, constraints)
+- agent_lesson: Lessons the agent learned (what worked, what didn't, corrections needed)
+- agent_pattern: Recurring patterns to remember (common workflows, frequent requests)
+- task: Tasks or follow-ups mentioned (todos, pending work, deadlines)
+
+For each memory, provide:
+1. category: One of the above categories
+2. content: The actual memory content (concise, actionable)
+3. tags: 2-5 relevant tags for retrieval
+4. importance: 1-10 scale (10 = critical, 1 = trivial)
+5. reasoning: Brief explanation of why this is worth remembering
+
+Output as JSON array:
+```json
+[
+  {{
+    "category": "user_preference",
+    "content": "...",
+    "tags": ["tag1", "tag2"],
+    "importance": 7,
+    "reasoning": "..."
+  }}
+]
+```
+
+If no significant memories found, return empty array: []"#,
+            conversation
+        )
+    }
+
+    /// Call LLM for extraction
+    async fn call_llm(&self, prompt: &str) -> Result<String, String> {
+        // If LLM endpoint is configured, use it
+        if let (Some(endpoint), Some(api_key)) = (&self.llm_endpoint, &self.api_key) {
+            return self.call_llm_api(endpoint, api_key, prompt).await;
+        }
+
+        // Otherwise, use rule-based extraction as fallback
+        self.rule_based_extraction(prompt)
+    }
+
+    /// Call external LLM API (doubao, OpenAI, etc.)
+    async fn call_llm_api(
+        &self,
+        endpoint: &str,
+        api_key: &str,
+        prompt: &str,
+    ) -> Result<String, String> {
+        let client = reqwest::Client::new();
+
+        let response = client
+            .post(endpoint)
+            .header("Authorization", format!("Bearer {}", api_key))
+            .header("Content-Type", "application/json")
+            .json(&serde_json::json!({
+                "model": "doubao-pro-32k",
+                "messages": [
+                    {"role": "user", "content": prompt}
+                ],
+                "temperature": 0.3,
+                "max_tokens": 2000
+            }))
+            .send()
+            .await
+            .map_err(|e| format!("LLM API request failed: {}", e))?;
+
+        if !response.status().is_success() {
+            return Err(format!("LLM API error: {}", response.status()));
+        }
+
+        let json: serde_json::Value = response
+            .json()
+            .await
+            .map_err(|e| format!("Failed to parse LLM response: {}", e))?;
+
+        // Extract content from response (adjust based on API format)
+        let content = json
+            .get("choices")
+            .and_then(|c| c.get(0))
+            .and_then(|c| c.get("message"))
+            .and_then(|m| m.get("content"))
+            .and_then(|c| c.as_str())
+            .ok_or("Invalid LLM response format")?
+            .to_string();
+
+        Ok(content)
+    }
+
+    /// Rule-based extraction as fallback when LLM is not available
+    fn rule_based_extraction(&self, prompt: &str) -> Result<String, String> {
+        // Simple pattern matching for common memory patterns
+        let mut memories: Vec<ExtractedMemory> = Vec::new();
+
+        // Pattern: User preferences
+        let pref_patterns = [
+            (r"I prefer (.+)", "user_preference"),
+            (r"My preference is (.+)", "user_preference"),
+            (r"I like (.+)", "user_preference"),
+            (r"I don't like (.+)", "user_preference"),
+        ];
+
+        // Pattern: User facts
+        let fact_patterns = [
+            (r"My name is (.+)", "user_fact"),
+            (r"I work on (.+)", "user_fact"),
+            (r"I'm a (.+)", "user_fact"),
+            (r"My project is (.+)", "user_fact"),
+        ];
+
+        // Extract using regex (simplified implementation)
+        for (pattern, category) in pref_patterns.iter().chain(fact_patterns.iter()) {
+            if let Ok(re) = regex::Regex::new(pattern) {
+                for cap in re.captures_iter(prompt) {
+                    if let Some(content) = cap.get(1) {
+                        let memory = ExtractedMemory {
+                            category: if *category == "user_preference" {
+                                MemoryCategory::UserPreference
+                            } else {
+                                MemoryCategory::UserFact
+                            },
+                            content: content.as_str().to_string(),
+                            tags: vec!["auto-extracted".to_string()],
+                            importance: 6,
+                            suggested_uri: format!(
+                                "viking://user/memories/{}/{}",
+                                category,
+                                chrono::Utc::now().timestamp_millis()
+                            ),
+                            reasoning: Some("Extracted via rule-based pattern matching".to_string()),
+                        };
+                        memories.push(memory);
+                    }
+                }
+            }
+        }
+
+        // Return as JSON
+        serde_json::to_string_pretty(&memories)
+            .map_err(|e| format!("Failed to serialize memories: {}", e))
+    }
+
+    /// Parse LLM response into structured memories
+    fn parse_extraction(&self, response: &str) -> Result<Vec<ExtractedMemory>, String> {
+        // Try to extract JSON from the response
+        let json_start = response.find('[').unwrap_or(0);
+        let json_end = response.rfind(']').map(|i| i + 1).unwrap_or(response.len());
+        let json_str = &response[json_start..json_end];
+
+        // Parse JSON
+        let raw_memories: Vec<serde_json::Value> = serde_json::from_str(json_str)
+            .unwrap_or_default();
+
+        let memories: Vec<ExtractedMemory> = raw_memories
+            .into_iter()
+            .filter_map(|m| self.parse_memory(&m))
+            .collect();
+
+        Ok(memories)
+    }
+
+    /// Parse a single memory from JSON
+    fn parse_memory(&self, value: &serde_json::Value) -> Option<ExtractedMemory> {
+        let category_str = value.get("category")?.as_str()?;
+        let category = match category_str {
+            "user_preference" => MemoryCategory::UserPreference,
+            "user_fact" => MemoryCategory::UserFact,
+            "agent_lesson" => MemoryCategory::AgentLesson,
+            "agent_pattern" => MemoryCategory::AgentPattern,
+            "task" => MemoryCategory::Task,
+            _ => return None,
+        };
+
+        let content = value.get("content")?.as_str()?.to_string();
+        let tags = value
+            .get("tags")
+            .and_then(|t| t.as_array())
+            .map(|arr| {
+                arr.iter()
+                    .filter_map(|v| v.as_str().map(String::from))
+                    .collect()
+            })
+            .unwrap_or_default();
+
+        let importance = value
+            .get("importance")
+            .and_then(|v| v.as_u64())
+            .unwrap_or(5) as u8;
+
+        let reasoning = value
+            .get("reasoning")
+            .and_then(|v| v.as_str())
+            .map(String::from);
+
+        // Generate URI based on category
+        let suggested_uri = self.generate_uri(&category, &content);
+
+        Some(ExtractedMemory {
+            category,
+            content,
+            tags,
+            importance,
+            suggested_uri,
+            reasoning,
+        })
+    }
+
+    /// Generate a URI for the memory
+    fn generate_uri(&self, category: &MemoryCategory, content: &str) -> String {
+        let timestamp = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .map(|d| d.as_millis())
+            .unwrap_or(0);
+
+        let content_hash = &content[..content.len().min(20)]
+            .to_lowercase()
+            .replace(' ', "_")
+            .replace(|c: char| !c.is_alphanumeric() && c != '_', "");
+
+        match category {
+            MemoryCategory::UserPreference => {
+                format!("viking://user/memories/preferences/{}_{}", content_hash, timestamp)
+            }
+            MemoryCategory::UserFact => {
+                format!("viking://user/memories/facts/{}_{}", content_hash, timestamp)
+            }
+            MemoryCategory::AgentLesson => {
+                format!(
+                    "viking://agent/{}/memories/lessons/{}_{}",
+                    self.config.agent_id, content_hash, timestamp
+                )
+            }
+            MemoryCategory::AgentPattern => {
+                format!(
+                    "viking://agent/{}/memories/patterns/{}_{}",
+                    self.config.agent_id, content_hash, timestamp
+                )
+            }
+            MemoryCategory::Task => {
+                format!(
+                    "viking://agent/{}/tasks/{}_{}",
+                    self.config.agent_id, content_hash, timestamp
+                )
+            }
+        }
+    }
+
+    /// Generate a summary of extracted memories
+    fn generate_summary(&self, memories: &[ExtractedMemory]) -> String {
+        if memories.is_empty() {
+            return "No significant memories extracted from this session.".to_string();
+        }
+
+        let mut summary_parts = Vec::new();
+
+        let user_prefs = memories
+            .iter()
+            .filter(|m| matches!(m.category, MemoryCategory::UserPreference))
+            .count();
+        if user_prefs > 0 {
+            summary_parts.push(format!("{} user preferences", user_prefs));
+        }
+
+        let user_facts = memories
+            .iter()
+            .filter(|m| matches!(m.category, MemoryCategory::UserFact))
+            .count();
+        if user_facts > 0 {
+            summary_parts.push(format!("{} user facts", user_facts));
+        }
+
+        let lessons = memories
+            .iter()
+            .filter(|m| matches!(m.category, MemoryCategory::AgentLesson))
+            .count();
+        if lessons > 0 {
+            summary_parts.push(format!("{} agent lessons", lessons));
+        }
+
+        let patterns = memories
+            .iter()
+            .filter(|m| matches!(m.category, MemoryCategory::AgentPattern))
+            .count();
+        if patterns > 0 {
+            summary_parts.push(format!("{} patterns", patterns));
+        }
+
+        let tasks = memories
+            .iter()
+            .filter(|m| matches!(m.category, MemoryCategory::Task))
+            .count();
+        if tasks > 0 {
+            summary_parts.push(format!("{} tasks", tasks));
+        }
+
+        format!(
+            "Extracted {} memories: {}.",
+            memories.len(),
+            summary_parts.join(", ")
+        )
+    }
+
+    /// Estimate tokens saved by extraction
+    fn estimate_tokens_saved(&self, messages: &[ChatMessage], summary: &str) -> u32 {
+        // Rough estimation: original messages vs summary
+        let original_tokens: u32 = messages
+            .iter()
+            .map(|m| (m.content.len() as f32 * 0.4) as u32)
+            .sum();
+
+        let summary_tokens = (summary.len() as f32 * 0.4) as u32;
+
+        original_tokens.saturating_sub(summary_tokens)
+    }
+}
+
+// === Tauri Commands ===
+
+#[tauri::command]
+pub async fn extract_session_memories(
+    messages: Vec<ChatMessage>,
+    agent_id: String,
+) -> Result<ExtractionResult, String> {
+    let config = ExtractionConfig {
+        agent_id,
+        ..Default::default()
+    };
+
+    let extractor = SessionExtractor::new(config);
+    extractor.extract(&messages).await
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_extraction_config_default() {
+        let config = ExtractionConfig::default();
+        assert_eq!(config.max_memories, 10);
+        assert_eq!(config.min_importance, 5);
+    }
+
+    #[test]
+    fn test_uri_generation() {
+        let config = ExtractionConfig::default();
+        let extractor = SessionExtractor::new(config);
+
+        let uri = extractor.generate_uri(
+            &MemoryCategory::UserPreference,
+            "dark mode enabled"
+        );
+        assert!(uri.starts_with("viking://user/memories/preferences/"));
+    }
+}
--- a/desktop/src-tauri/src/memory/mod.rs
+++ b/desktop/src-tauri/src/memory/mod.rs
@@ -0,0 +1,13 @@
+//! Memory Module - OpenViking Supplemental Components
+//!
+//! This module provides functionality that the OpenViking CLI lacks:
+//! - Session extraction: LLM-powered memory extraction from conversations
+//! - Context building: L0/L1/L2 layered context loading
+//!
+//! These components work alongside the OpenViking CLI sidecar.
+
+pub mod extractor;
+pub mod context_builder;
+
+pub use extractor::{SessionExtractor, ExtractedMemory, ExtractionConfig};
+pub use context_builder::{ContextBuilder, EnhancedContext, ContextLevel};