fix(audit): 第五轮审计修复 — 反思LLM分析、语义路由、并行执行、错误中文化

- P2: 反思引擎接入 LLM 深度行为分析 (analyze_patterns_with_llm) - P3-M6: 语义路由 RuntimeLlmIntentDriver 真实 LLM 匹配 - P3-L1: V2 Pipeline execute_parallel 改用 buffer_unordered 真正并行 - P3-S10: Rust 用户可见错误提示统一中文化累计修复 27 项，完成度 ~72% → ~78%
2026-03-27 12:10:48 +08:00
parent 30b2515f07
commit 256dba49db
10 changed files with 393 additions and 84 deletions
--- a/crates/zclaw-pipeline/src/actions/mod.rs
+++ b/crates/zclaw-pipeline/src/actions/mod.rs
@@ -150,7 +150,7 @@ impl ActionRegistry {
                .await
                .map_err(ActionError::Llm)
        } else {
-            Err(ActionError::Llm("LLM driver not configured".to_string()))
+            Err(ActionError::Llm("LLM 驱动未配置，请在设置中配置模型与 API".to_string()))
        }
    }

@@ -165,7 +165,7 @@ impl ActionRegistry {
                .await
                .map_err(ActionError::Skill)
        } else {
-            Err(ActionError::Skill("Skill registry not configured".to_string()))
+            Err(ActionError::Skill("技能注册表未初始化".to_string()))
        }
    }

@@ -181,7 +181,7 @@ impl ActionRegistry {
                .await
                .map_err(ActionError::Hand)
        } else {
-            Err(ActionError::Hand("Hand registry not configured".to_string()))
+            Err(ActionError::Hand("Hand 注册表未初始化".to_string()))
        }
    }

@@ -197,7 +197,7 @@ impl ActionRegistry {
                .await
                .map_err(ActionError::Orchestration)
        } else {
-            Err(ActionError::Orchestration("Orchestration driver not configured".to_string()))
+            Err(ActionError::Orchestration("编排驱动未初始化".to_string()))
        }
    }

--- a/crates/zclaw-pipeline/src/engine/stage.rs
+++ b/crates/zclaw-pipeline/src/engine/stage.rs
@@ -10,6 +10,7 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 use async_trait::async_trait;
+use futures::stream::{self, StreamExt};
 use serde_json::{Value, json};

 use crate::types_v2::{Stage, ConditionalBranch};
@@ -269,7 +270,7 @@ impl StageEngine {

        self.emit_event(StageEvent::Progress {
            stage_id: stage_id.to_string(),
-            message: "Calling LLM...".to_string(),
+            message: "正在调用 LLM...".to_string(),
        });

        let prompt_str = resolved_prompt.as_str()
@@ -302,7 +303,7 @@ impl StageEngine {
        stage_id: &str,
        each: &str,
        stage_template: &Stage,
-        _max_workers: usize,
+        max_workers: usize,
        context: &mut ExecutionContextV2,
    ) -> Result<Value, StageError> {
        // Resolve the array to iterate over
@@ -313,29 +314,58 @@ impl StageEngine {
            return Ok(Value::Array(vec![]));
        }

+        let workers = max_workers.max(1).min(total);
+        let stage_template = stage_template.clone();
+
+        // Clone Arc drivers for concurrent tasks
+        let llm_driver = self.llm_driver.clone();
+        let skill_driver = self.skill_driver.clone();
+        let hand_driver = self.hand_driver.clone();
+        let event_callback = self.event_callback.clone();
+
        self.emit_event(StageEvent::Progress {
            stage_id: stage_id.to_string(),
-            message: format!("Processing {} items", total),
+            message: format!("并行处理 {} 项 (workers={})", total, workers),
        });

-        // Sequential execution with progress tracking
-        // Note: True parallel execution would require Send-safe drivers
-        let mut outputs = Vec::with_capacity(total);
+        // Parallel execution using buffer_unordered
+        let results: Vec<(usize, Result<StageResult, StageError>)> = stream::iter(
+            items.into_iter().enumerate().map(|(index, item)| {
+                let child_ctx = context.child_context(item, index, total);
+                let stage = stage_template.clone();
+                let llm = llm_driver.clone();
+                let skill = skill_driver.clone();
+                let hand = hand_driver.clone();
+                let cb = event_callback.clone();

-        for (index, item) in items.into_iter().enumerate() {
-            let mut child_context = context.child_context(item.clone(), index, total);
+                async move {
+                    let engine = StageEngine {
+                        llm_driver: llm,
+                        skill_driver: skill,
+                        hand_driver: hand,
+                        event_callback: cb,
+                        max_workers: workers,
+                    };
+                    let mut ctx = child_ctx;
+                    let result = engine.execute(&stage, &mut ctx).await;
+                    (index, result)
+                }
+            })
+        )
+        .buffer_unordered(workers)
+        .collect()
+        .await;

-            self.emit_event(StageEvent::ParallelProgress {
-                stage_id: stage_id.to_string(),
-                completed: index,
-                total,
-            });
+        // Sort by original index to preserve order
+        let mut ordered: Vec<_> = results.into_iter().collect();
+        ordered.sort_by_key(|(idx, _)| *idx);

-            match self.execute(stage_template, &mut child_context).await {
-                Ok(result) => outputs.push(result.output),
-                Err(e) => outputs.push(json!({ "error": e.to_string(), "index": index })),
+        let outputs: Vec<Value> = ordered.into_iter().map(|(index, result)| {
+            match result {
+                Ok(sr) => sr.output,
+                Err(e) => json!({ "error": e.to_string(), "index": index }),
            }
-        }
+        }).collect();

        Ok(Value::Array(outputs))
    }
--- a/crates/zclaw-pipeline/src/executor.rs
+++ b/crates/zclaw-pipeline/src/executor.rs
@@ -125,7 +125,7 @@ impl PipelineExecutor {
            return Ok(run.clone());
        }

-        Err(ExecuteError::Action("Run not found after execution".to_string()))
+        Err(ExecuteError::Action("执行后未找到运行记录".to_string()))
    }

    /// Execute pipeline steps
@@ -215,7 +215,7 @@ impl PipelineExecutor {
                Action::Parallel { each, step, max_workers } => {
                    let items = context.resolve(each)?;
                    let items_array = items.as_array()
-                        .ok_or_else(|| ExecuteError::Action("Parallel 'each' must resolve to an array".to_string()))?;
+                        .ok_or_else(|| ExecuteError::Action("并行执行 'each' 必须解析为数组".to_string()))?;

                    let workers = max_workers.unwrap_or(4);
                    let results = self.execute_parallel(step, items_array.clone(), workers, context).await?;
--- a/crates/zclaw-pipeline/src/intent.rs
+++ b/crates/zclaw-pipeline/src/intent.rs
@@ -402,23 +402,25 @@ pub struct DefaultLlmIntentDriver {
    model_id: String,
 }

-impl DefaultLlmIntentDriver {
-    /// Create a new default LLM driver
-    pub fn new(model_id: impl Into<String>) -> Self {
-        Self {
-            model_id: model_id.into(),
-        }
+/// Runtime LLM driver that wraps zclaw-runtime's LlmDriver for actual LLM calls
+pub struct RuntimeLlmIntentDriver {
+    driver: std::sync::Arc<dyn zclaw_runtime::driver::LlmDriver>,
+}
+
+impl RuntimeLlmIntentDriver {
+    /// Create a new runtime LLM intent driver wrapping an existing LLM driver
+    pub fn new(driver: std::sync::Arc<dyn zclaw_runtime::driver::LlmDriver>) -> Self {
+        Self { driver }
    }
 }

 #[async_trait]
-impl LlmIntentDriver for DefaultLlmIntentDriver {
+impl LlmIntentDriver for RuntimeLlmIntentDriver {
    async fn semantic_match(
        &self,
        user_input: &str,
        triggers: &[CompiledTrigger],
    ) -> Option<SemanticMatchResult> {
-        // Build prompt for LLM
        let trigger_descriptions: Vec<String> = triggers
            .iter()
            .map(|t| {
@@ -430,31 +432,42 @@ impl LlmIntentDriver for DefaultLlmIntentDriver {
            })
            .collect();

-        let prompt = format!(
-            r#"分析用户输入，匹配合适的 Pipeline。
+        let system_prompt = r#"分析用户输入，匹配合适的 Pipeline。只返回 JSON，不要其他内容。"#
+            .to_string();

-用户输入: {}
-
-可选 Pipelines:
-{}
-
-返回 JSON 格式:
-{{
-  "pipeline_id": "匹配的 pipeline ID 或 null",
-  "params": {{ "参数名": "值" }},
-  "confidence": 0.0-1.0,
-  "reason": "匹配原因"
-}}
-
-只返回 JSON，不要其他内容。"#,
+        let user_msg = format!(
+            "用户输入: {}\n\n可选 Pipelines:\n{}",
            user_input,
            trigger_descriptions.join("\n")
        );

-        // In a real implementation, this would call the LLM
-        // For now, we return None to indicate semantic matching is not available
-        let _ = prompt; // Suppress unused warning
-        None
+        let request = zclaw_runtime::driver::CompletionRequest {
+            model: self.driver.provider().to_string(),
+            system: Some(system_prompt),
+            messages: vec![zclaw_types::Message::assistant(user_msg)],
+            max_tokens: Some(512),
+            temperature: Some(0.2),
+            stream: false,
+            ..Default::default()
+        };
+
+        match self.driver.complete(request).await {
+            Ok(response) => {
+                let text = response.content.iter()
+                    .filter_map(|block| match block {
+                        zclaw_runtime::driver::ContentBlock::Text { text } => Some(text.as_str()),
+                        _ => None,
+                    })
+                    .collect::<Vec<_>>()
+                    .join("");
+
+                parse_semantic_match_response(&text)
+            }
+            Err(e) => {
+                tracing::warn!("[intent] LLM semantic match failed: {}", e);
+                None
+            }
+        }
    }

    async fn collect_params(
@@ -463,7 +476,10 @@ impl LlmIntentDriver for DefaultLlmIntentDriver {
        missing_params: &[MissingParam],
        _context: &HashMap<String, serde_json::Value>,
    ) -> HashMap<String, serde_json::Value> {
-        // Build prompt to extract parameters from user input
+        if missing_params.is_empty() {
+            return HashMap::new();
+        }
+
        let param_descriptions: Vec<String> = missing_params
            .iter()
            .map(|p| {
@@ -476,30 +492,123 @@ impl LlmIntentDriver for DefaultLlmIntentDriver {
            })
            .collect();

-        let prompt = format!(
-            r#"从用户输入中提取参数值。
+        let system_prompt = r#"从用户输入中提取参数值。如果无法提取，该参数可以省略。只返回 JSON。"#
+            .to_string();

-用户输入: {}
-
-需要提取的参数:
-{}
-
-返回 JSON 格式:
-{{
-  "参数名": "提取的值"
-}}
-
-如果无法提取，该参数可以省略。只返回 JSON。"#,
+        let user_msg = format!(
+            "用户输入: {}\n\n需要提取的参数:\n{}",
            user_input,
            param_descriptions.join("\n")
        );

-        // In a real implementation, this would call the LLM
-        let _ = prompt;
-        HashMap::new()
+        let request = zclaw_runtime::driver::CompletionRequest {
+            model: self.driver.provider().to_string(),
+            system: Some(system_prompt),
+            messages: vec![zclaw_types::Message::assistant(user_msg)],
+            max_tokens: Some(512),
+            temperature: Some(0.1),
+            stream: false,
+            ..Default::default()
+        };
+
+        match self.driver.complete(request).await {
+            Ok(response) => {
+                let text = response.content.iter()
+                    .filter_map(|block| match block {
+                        zclaw_runtime::driver::ContentBlock::Text { text } => Some(text.as_str()),
+                        _ => None,
+                    })
+                    .collect::<Vec<_>>()
+                    .join("");
+
+                parse_params_response(&text)
+            }
+            Err(e) => {
+                tracing::warn!("[intent] LLM param extraction failed: {}", e);
+                HashMap::new()
+            }
+        }
    }
 }

+/// Parse semantic match JSON from LLM response
+fn parse_semantic_match_response(text: &str) -> Option<SemanticMatchResult> {
+    let json_str = extract_json_from_text(text);
+    let parsed: serde_json::Value = serde_json::from_str(&json_str).ok()?;
+
+    let pipeline_id = parsed.get("pipeline_id")?.as_str()?.to_string();
+    let confidence = parsed.get("confidence")?.as_f64()? as f32;
+
+    // Reject low-confidence matches
+    if confidence < 0.5 || pipeline_id.is_empty() {
+        return None;
+    }
+
+    let params = parsed.get("params")
+        .and_then(|v| v.as_object())
+        .map(|obj| {
+            obj.iter()
+                .filter_map(|(k, v)| {
+                    let val = match v {
+                        serde_json::Value::String(s) => serde_json::Value::String(s.clone()),
+                        serde_json::Value::Number(n) => serde_json::Value::Number(n.clone()),
+                        other => other.clone(),
+                    };
+                    Some((k.clone(), val))
+                })
+                .collect()
+        })
+        .unwrap_or_default();
+
+    let reason = parsed.get("reason")
+        .and_then(|v| v.as_str())
+        .unwrap_or("")
+        .to_string();
+
+    Some(SemanticMatchResult {
+        pipeline_id,
+        params,
+        confidence,
+        reason,
+    })
+}
+
+/// Parse params JSON from LLM response
+fn parse_params_response(text: &str) -> HashMap<String, serde_json::Value> {
+    let json_str = extract_json_from_text(text);
+    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&json_str) {
+        if let Some(obj) = parsed.as_object() {
+            return obj.iter()
+                .filter_map(|(k, v)| Some((k.clone(), v.clone())))
+                .collect();
+        }
+    }
+    HashMap::new()
+}
+
+/// Extract JSON from LLM response text (handles markdown code blocks)
+fn extract_json_from_text(text: &str) -> String {
+    let trimmed = text.trim();
+
+    // Try markdown code block
+    if let Some(start) = trimmed.find("```json") {
+        if let Some(content_start) = trimmed[start..].find('\n') {
+            if let Some(end) = trimmed[content_start..].find("```") {
+                return trimmed[content_start + 1..content_start + end].trim().to_string();
+            }
+        }
+    }
+
+    // Try bare JSON
+    if let Some(start) = trimmed.find('{') {
+        if let Some(end) = trimmed.rfind('}') {
+            return trimmed[start..end + 1].to_string();
+        }
+    }
+
+    trimmed.to_string()
+}
+
 /// Intent analysis result (for debugging/logging)
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]