feat(audit): 审计修复第四轮 — 跨会话搜索、LLM压缩集成、Presentation渲染器

- S9: MessageSearch 新增 Session/Global 双模式，Global 调用 VikingStorage memory_search - M4b: LLM 压缩器集成到 kernel AgentLoop，支持 use_llm 配置切换 - M4c: 压缩时自动提取记忆到 VikingStorage (runtime + tauri 双路径) - H6: 新增 ChartRenderer(recharts)、Document/Slideshow 完整渲染 - 累计修复 23 项，整体完成度 ~72%，真实可用率 ~80%
2026-03-27 11:44:14 +08:00
parent 7ae6990c97
commit 30b2515f07
16 changed files with 2121 additions and 245 deletions
--- a/crates/zclaw-runtime/src/compaction.rs
+++ b/crates/zclaw-runtime/src/compaction.rs
@@ -5,8 +5,18 @@
 //! token count exceeds the configured threshold, older messages are
 //! summarized into a single system message and only recent messages are
 //! retained.
+//!
+//! Supports two compaction modes:
+//! - **Rule-based**: Heuristic topic extraction (default, no LLM needed)
+//! - **LLM-based**: Uses an LLM driver to generate higher-quality summaries
+//!
+//! Optionally flushes old messages to the growth/memory system before discarding.

-use zclaw_types::Message;
+use std::sync::Arc;
+use zclaw_types::{AgentId, Message, SessionId};
+
+use crate::driver::{CompletionRequest, ContentBlock, LlmDriver};
+use crate::growth::GrowthIntegration;

 /// Number of recent messages to preserve after compaction.
 const DEFAULT_KEEP_RECENT: usize = 6;
@@ -146,6 +156,272 @@ pub fn maybe_compact(messages: Vec<Message>, threshold: usize) -> Vec<Message> {
    compacted
 }

+/// Configuration for compaction behavior.
+#[derive(Debug, Clone)]
+pub struct CompactionConfig {
+    /// Use LLM for generating summaries instead of rule-based extraction.
+    pub use_llm: bool,
+    /// Fall back to rule-based summary if LLM fails.
+    pub llm_fallback_to_rules: bool,
+    /// Flush memories from old messages before discarding them.
+    pub memory_flush_enabled: bool,
+    /// Maximum tokens for LLM-generated summary.
+    pub summary_max_tokens: u32,
+}
+
+impl Default for CompactionConfig {
+    fn default() -> Self {
+        Self {
+            use_llm: false,
+            llm_fallback_to_rules: true,
+            memory_flush_enabled: false,
+            summary_max_tokens: 500,
+        }
+    }
+}
+
+/// Outcome of an async compaction operation.
+#[derive(Debug, Clone)]
+pub struct CompactionOutcome {
+    /// The (possibly compacted) message list.
+    pub messages: Vec<Message>,
+    /// Number of messages removed during compaction.
+    pub removed_count: usize,
+    /// Number of memories flushed to the growth system.
+    pub flushed_memories: usize,
+    /// Whether LLM was used for summary generation.
+    pub used_llm: bool,
+}
+
+/// Async compaction with optional LLM summary and memory flushing.
+///
+/// When `messages` exceed `threshold` tokens:
+/// 1. If `memory_flush_enabled`, extract memories from old messages via growth system
+/// 2. Generate summary (LLM or rule-based depending on config)
+/// 3. Replace old messages with summary + keep recent messages
+pub async fn maybe_compact_with_config(
+    messages: Vec<Message>,
+    threshold: usize,
+    config: &CompactionConfig,
+    agent_id: &AgentId,
+    session_id: &SessionId,
+    driver: Option<&Arc<dyn LlmDriver>>,
+    growth: Option<&GrowthIntegration>,
+) -> CompactionOutcome {
+    let tokens = estimate_messages_tokens(&messages);
+    if tokens < threshold {
+        return CompactionOutcome {
+            messages,
+            removed_count: 0,
+            flushed_memories: 0,
+            used_llm: false,
+        };
+    }
+
+    tracing::info!(
+        "[Compaction] Triggered: {} tokens > {} threshold, {} messages",
+        tokens,
+        threshold,
+        messages.len(),
+    );
+
+    // Step 1: Flush memories from messages that are about to be compacted
+    let flushed_memories = if config.memory_flush_enabled {
+        if let Some(growth) = growth {
+            match growth
+                .process_conversation(agent_id, &messages, session_id.clone())
+                .await
+            {
+                Ok(count) => {
+                    tracing::info!(
+                        "[Compaction] Flushed {} memories before compaction",
+                        count
+                    );
+                    count
+                }
+                Err(e) => {
+                    tracing::warn!("[Compaction] Memory flush failed: {}", e);
+                    0
+                }
+            }
+        } else {
+            tracing::debug!("[Compaction] Memory flush requested but no growth integration available");
+            0
+        }
+    } else {
+        0
+    };
+
+    // Step 2: Determine split point (same logic as compact_messages)
+    let leading_system_count = messages
+        .iter()
+        .take_while(|m| matches!(m, Message::System { .. }))
+        .count();
+    let keep_from_end = DEFAULT_KEEP_RECENT
+        .min(messages.len().saturating_sub(leading_system_count));
+    let split_index = messages.len().saturating_sub(keep_from_end);
+    let split_index = split_index.max(leading_system_count);
+
+    if split_index == 0 {
+        return CompactionOutcome {
+            messages,
+            removed_count: 0,
+            flushed_memories,
+            used_llm: false,
+        };
+    }
+
+    let old_messages = &messages[..split_index];
+    let recent_messages = &messages[split_index..];
+    let removed_count = old_messages.len();
+
+    // Step 3: Generate summary (LLM or rule-based)
+    let summary = if config.use_llm {
+        if let Some(driver) = driver {
+            match generate_llm_summary(driver, old_messages, config.summary_max_tokens).await {
+                Ok(llm_summary) => {
+                    tracing::info!(
+                        "[Compaction] Generated LLM summary ({} chars)",
+                        llm_summary.len()
+                    );
+                    llm_summary
+                }
+                Err(e) => {
+                    if config.llm_fallback_to_rules {
+                        tracing::warn!(
+                            "[Compaction] LLM summary failed: {}, falling back to rules",
+                            e
+                        );
+                        generate_summary(old_messages)
+                    } else {
+                        tracing::warn!(
+                            "[Compaction] LLM summary failed: {}, returning original messages",
+                            e
+                        );
+                        return CompactionOutcome {
+                            messages,
+                            removed_count: 0,
+                            flushed_memories,
+                            used_llm: false,
+                        };
+                    }
+                }
+            }
+        } else {
+            tracing::warn!(
+                "[Compaction] LLM compaction requested but no driver available, using rules"
+            );
+            generate_summary(old_messages)
+        }
+    } else {
+        generate_summary(old_messages)
+    };
+
+    let used_llm = config.use_llm && driver.is_some();
+
+    // Step 4: Build compacted message list
+    let mut compacted = Vec::with_capacity(1 + recent_messages.len());
+    compacted.push(Message::system(summary));
+    compacted.extend(recent_messages.iter().cloned());
+
+    tracing::info!(
+        "[Compaction] Removed {} messages, {} remain (llm={})",
+        removed_count,
+        compacted.len(),
+        used_llm,
+    );
+
+    CompactionOutcome {
+        messages: compacted,
+        removed_count,
+        flushed_memories,
+        used_llm,
+    }
+}
+
+/// Generate a summary using an LLM driver.
+async fn generate_llm_summary(
+    driver: &Arc<dyn LlmDriver>,
+    messages: &[Message],
+    max_tokens: u32,
+) -> Result<String, String> {
+    let mut conversation_text = String::new();
+    for msg in messages {
+        match msg {
+            Message::User { content } => {
+                conversation_text.push_str(&format!("用户: {}\n", content))
+            }
+            Message::Assistant { content, .. } => {
+                conversation_text.push_str(&format!("助手: {}\n", content))
+            }
+            Message::System { content } => {
+                if !content.starts_with("[以下是之前对话的摘要]") {
+                    conversation_text.push_str(&format!("[系统]: {}\n", content))
+                }
+            }
+            Message::ToolUse { tool, input, .. } => {
+                conversation_text.push_str(&format!(
+                    "[工具调用 {}]: {}\n",
+                    tool.as_str(),
+                    input
+                ))
+            }
+            Message::ToolResult { output, .. } => {
+                conversation_text.push_str(&format!("[工具结果]: {}\n", output))
+            }
+        }
+    }
+
+    // Truncate conversation text if too long for the prompt itself
+    let max_conversation_chars = 8000;
+    if conversation_text.len() > max_conversation_chars {
+        conversation_text.truncate(max_conversation_chars);
+        conversation_text.push_str("\n...(对话已截断)");
+    }
+
+    let prompt = format!(
+        "请用简洁的中文总结以下对话的关键信息。保留重要的讨论主题、决策、结论和待办事项。\
+         输出格式为段落式摘要，不超过200字。\n\n{}",
+        conversation_text
+    );
+
+    let request = CompletionRequest {
+        model: String::new(),
+        system: Some(
+            "你是一个对话摘要助手。只输出摘要内容，不要添加额外解释。".to_string(),
+        ),
+        messages: vec![Message::user(&prompt)],
+        tools: Vec::new(),
+        max_tokens: Some(max_tokens),
+        temperature: Some(0.3),
+        stop: Vec::new(),
+        stream: false,
+    };
+
+    let response = driver
+        .complete(request)
+        .await
+        .map_err(|e| format!("{}", e))?;
+
+    // Extract text from content blocks
+    let text_parts: Vec<String> = response
+        .content
+        .iter()
+        .filter_map(|block| match block {
+            ContentBlock::Text { text } => Some(text.clone()),
+            _ => None,
+        })
+        .collect();
+
+    let summary = text_parts.join("");
+
+    if summary.is_empty() {
+        return Err("LLM returned empty response".to_string());
+    }
+
+    Ok(summary)
+}
+
 /// Generate a rule-based summary of old messages.
 fn generate_summary(messages: &[Message]) -> String {
    if messages.is_empty() {
--- a/crates/zclaw-runtime/src/lib.rs
+++ b/crates/zclaw-runtime/src/lib.rs
@@ -24,3 +24,4 @@ pub use loop_runner::{AgentLoop, AgentLoopResult, LoopEvent};
 pub use loop_guard::{LoopGuard, LoopGuardConfig, LoopGuardResult};
 pub use stream::{StreamEvent, StreamSender};
 pub use growth::GrowthIntegration;
+pub use compaction::{CompactionConfig, CompactionOutcome};
--- a/crates/zclaw-runtime/src/loop_runner.rs
+++ b/crates/zclaw-runtime/src/loop_runner.rs
@@ -12,7 +12,7 @@ use crate::tool::{ToolRegistry, ToolContext, SkillExecutor};
 use crate::tool::builtin::PathValidator;
 use crate::loop_guard::{LoopGuard, LoopGuardResult};
 use crate::growth::GrowthIntegration;
-use crate::compaction;
+use crate::compaction::{self, CompactionConfig};
 use zclaw_memory::MemoryStore;

 /// Agent loop runner
@@ -32,6 +32,8 @@ pub struct AgentLoop {
    growth: Option<GrowthIntegration>,
    /// Compaction threshold in tokens (0 = disabled)
    compaction_threshold: usize,
+    /// Compaction behavior configuration
+    compaction_config: CompactionConfig,
 }

 impl AgentLoop {
@@ -55,6 +57,7 @@ impl AgentLoop {
            path_validator: None,
            growth: None,
            compaction_threshold: 0,
+            compaction_config: CompactionConfig::default(),
        }
    }

@@ -115,6 +118,12 @@ impl AgentLoop {
        self
    }

+    /// Set compaction configuration (LLM mode, memory flushing, etc.)
+    pub fn with_compaction_config(mut self, config: CompactionConfig) -> Self {
+        self.compaction_config = config;
+        self
+    }
+
    /// Get growth integration reference
    pub fn growth(&self) -> Option<&GrowthIntegration> {
        self.growth.as_ref()
@@ -150,7 +159,23 @@ impl AgentLoop {

        // Apply compaction if threshold is configured
        if self.compaction_threshold > 0 {
-            messages = compaction::maybe_compact(messages, self.compaction_threshold);
+            let needs_async =
+                self.compaction_config.use_llm || self.compaction_config.memory_flush_enabled;
+            if needs_async {
+                let outcome = compaction::maybe_compact_with_config(
+                    messages,
+                    self.compaction_threshold,
+                    &self.compaction_config,
+                    &self.agent_id,
+                    &session_id,
+                    Some(&self.driver),
+                    self.growth.as_ref(),
+                )
+                .await;
+                messages = outcome.messages;
+            } else {
+                messages = compaction::maybe_compact(messages, self.compaction_threshold);
+            }
        }

        // Enhance system prompt with growth memories
@@ -316,7 +341,23 @@ impl AgentLoop {

        // Apply compaction if threshold is configured
        if self.compaction_threshold > 0 {
-            messages = compaction::maybe_compact(messages, self.compaction_threshold);
+            let needs_async =
+                self.compaction_config.use_llm || self.compaction_config.memory_flush_enabled;
+            if needs_async {
+                let outcome = compaction::maybe_compact_with_config(
+                    messages,
+                    self.compaction_threshold,
+                    &self.compaction_config,
+                    &self.agent_id,
+                    &session_id,
+                    Some(&self.driver),
+                    self.growth.as_ref(),
+                )
+                .await;
+                messages = outcome.messages;
+            } else {
+                messages = compaction::maybe_compact(messages, self.compaction_threshold);
+            }
        }

        // Enhance system prompt with growth memories