Files
zclaw_openfang/crates/zclaw-skills/src/orchestration/executor.rs
iven d9b0b4f4f7
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
fix(audit): Batch 7-9 dead_code 标注 + TODO 清理 + 文档同步
Batch 7: dead_code 标注统一 (16 处)
- crates/ 9 处: growth, kernel, pipeline, runtime, saas, skills
- src-tauri/ 7 处: classroom, intelligence, browser, mcp
- 统一格式: #[allow(dead_code)] // @reserved: <原因>

Batch 7+: EvolutionEngine L2/L3 10 个未使用 pub 函数
- 全部标注 @reserved: EvolutionEngine L2/L3, post-release integration

Batch 9: TODO → FUTURE 标记 (4 处)
- html.rs: template-based export
- nl_schedule.rs: LLM-assisted parsing
- knowledge/handlers.rs: category_id from upload
- personality_detector.rs: VikingStorage persistence

Batch 5+: Cargo.lock 更新 (serde_yaml_bw 迁移)

全量测试通过: 719 passed, 0 failed
2026-04-19 08:54:57 +08:00

356 lines
12 KiB
Rust

//! Orchestration executor
//!
//! Executes skill graphs with parallel execution, data passing,
//! error handling, and progress tracking.
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::RwLock;
use serde_json::Value;
use zclaw_types::Result;
use crate::{SkillRegistry, SkillContext};
use super::{
SkillGraph, OrchestrationPlan, OrchestrationResult, NodeResult,
OrchestrationProgress, ErrorStrategy, OrchestrationContext,
planner::OrchestrationPlanner,
};
/// Wrapper to make NodeResult Send for JoinSet
struct ParallelNodeResult {
node_id: String,
result: NodeResult,
}
/// Skill graph executor trait
#[async_trait::async_trait]
pub trait SkillGraphExecutor: Send + Sync {
/// Execute a skill graph with given inputs
async fn execute(
&self,
graph: &SkillGraph,
inputs: HashMap<String, Value>,
context: &SkillContext,
) -> Result<OrchestrationResult>;
/// Execute with progress callback
async fn execute_with_progress<F>(
&self,
graph: &SkillGraph,
inputs: HashMap<String, Value>,
context: &SkillContext,
progress_fn: F,
) -> Result<OrchestrationResult>
where
F: Fn(OrchestrationProgress) + Send + Sync;
/// Execute a pre-built plan
async fn execute_plan(
&self,
plan: &OrchestrationPlan,
inputs: HashMap<String, Value>,
context: &SkillContext,
) -> Result<OrchestrationResult>;
}
/// Default executor implementation
pub struct DefaultExecutor {
/// Skill registry for executing skills
registry: Arc<SkillRegistry>,
/// Cancellation tokens
cancellations: RwLock<HashMap<String, bool>>,
}
impl DefaultExecutor {
pub fn new(registry: Arc<SkillRegistry>) -> Self {
Self {
registry,
cancellations: RwLock::new(HashMap::new()),
}
}
/// Cancel an ongoing orchestration
pub async fn cancel(&self, graph_id: &str) {
let mut cancellations = self.cancellations.write().await;
cancellations.insert(graph_id.to_string(), true);
}
/// Check if cancelled
async fn is_cancelled(&self, graph_id: &str) -> bool {
let cancellations = self.cancellations.read().await;
cancellations.get(graph_id).copied().unwrap_or(false)
}
/// Execute a single node (used by pipeline orchestration action driver)
#[allow(dead_code)] // @reserved: post-release pipeline orchestration action driver
async fn execute_node(
&self,
node: &super::SkillNode,
orch_context: &OrchestrationContext,
skill_context: &SkillContext,
) -> Result<NodeResult> {
let start = Instant::now();
let node_id = node.id.clone();
// Check condition
if let Some(when) = &node.when {
if !orch_context.evaluate_condition(when).unwrap_or(false) {
return Ok(NodeResult {
node_id,
success: true,
output: Value::Null,
error: None,
duration_ms: 0,
retries: 0,
skipped: true,
});
}
}
// Resolve input mappings
let input = orch_context.resolve_node_input(node);
// Execute with retry
let max_attempts = node.retry.as_ref()
.map(|r| r.max_attempts)
.unwrap_or(1);
let delay_ms = node.retry.as_ref()
.map(|r| r.delay_ms)
.unwrap_or(1000);
let mut last_error = None;
let mut attempts = 0;
for attempt in 0..max_attempts {
attempts = attempt + 1;
// Apply timeout if specified
let result = if let Some(timeout_secs) = node.timeout_secs {
tokio::time::timeout(
Duration::from_secs(timeout_secs),
self.registry.execute(&node.skill_id, skill_context, input.clone())
).await
.map_err(|_| zclaw_types::ZclawError::Timeout(format!(
"Node {} timed out after {}s",
node.id, timeout_secs
)))?
} else {
self.registry.execute(&node.skill_id, skill_context, input.clone()).await
};
match result {
Ok(skill_result) if skill_result.success => {
return Ok(NodeResult {
node_id,
success: true,
output: skill_result.output,
error: None,
duration_ms: start.elapsed().as_millis() as u64,
retries: attempt,
skipped: false,
});
}
Ok(skill_result) => {
last_error = skill_result.error;
}
Err(e) => {
last_error = Some(e.to_string());
}
}
// Delay before retry (except last attempt)
if attempt < max_attempts - 1 {
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
}
}
// All retries failed
Ok(NodeResult {
node_id,
success: false,
output: Value::Null,
error: last_error,
duration_ms: start.elapsed().as_millis() as u64,
retries: attempts - 1,
skipped: false,
})
}
}
#[async_trait::async_trait]
impl SkillGraphExecutor for DefaultExecutor {
async fn execute(
&self,
graph: &SkillGraph,
inputs: HashMap<String, Value>,
context: &SkillContext,
) -> Result<OrchestrationResult> {
// Build plan first
let plan = super::DefaultPlanner::new().plan(graph)?;
self.execute_plan(&plan, inputs, context).await
}
async fn execute_with_progress<F>(
&self,
graph: &SkillGraph,
inputs: HashMap<String, Value>,
context: &SkillContext,
progress_fn: F,
) -> Result<OrchestrationResult>
where
F: Fn(OrchestrationProgress) + Send + Sync,
{
let plan = super::DefaultPlanner::new().plan(graph)?;
let start = Instant::now();
let mut orch_context = OrchestrationContext::new(graph, inputs);
let mut node_results: HashMap<String, NodeResult> = HashMap::new();
let mut progress = OrchestrationProgress::new(&graph.id, graph.nodes.len());
// Execute parallel groups sequentially, but nodes within each group in parallel
for group in &plan.parallel_groups {
if self.is_cancelled(&graph.id).await {
return Ok(OrchestrationResult {
success: false,
output: Value::Null,
node_results,
duration_ms: start.elapsed().as_millis() as u64,
error: Some("Cancelled".to_string()),
});
}
progress.status = format!("Executing group with {} nodes", group.len());
progress_fn(progress.clone());
// Execute all nodes in the group concurrently using JoinSet
let mut join_set = tokio::task::JoinSet::new();
for node_id in group {
if let Some(node) = graph.nodes.iter().find(|n| &n.id == node_id) {
let node = node.clone();
let node_id = node_id.clone();
let orch_ctx = orch_context.clone();
let skill_ctx = context.clone();
let registry = self.registry.clone();
join_set.spawn(async move {
let input = orch_ctx.resolve_node_input(&node);
let start = Instant::now();
let result = registry.execute(&node.skill_id, &skill_ctx, input).await;
let nr = match result {
Ok(sr) if sr.success => NodeResult {
node_id: node_id.clone(),
success: true,
output: sr.output,
error: None,
duration_ms: start.elapsed().as_millis() as u64,
retries: 0,
skipped: false,
},
Ok(sr) => NodeResult {
node_id: node_id.clone(),
success: false,
output: Value::Null,
error: sr.error,
duration_ms: start.elapsed().as_millis() as u64,
retries: 0,
skipped: false,
},
Err(e) => NodeResult {
node_id: node_id.clone(),
success: false,
output: Value::Null,
error: Some(e.to_string()),
duration_ms: start.elapsed().as_millis() as u64,
retries: 0,
skipped: false,
},
};
ParallelNodeResult { node_id, result: nr }
});
}
}
// Collect results as tasks complete
while let Some(join_result) = join_set.join_next().await {
match join_result {
Ok(parallel_result) => {
let ParallelNodeResult { node_id, result } = parallel_result;
if result.success {
orch_context.set_node_output(&node_id, result.output.clone());
progress.completed_nodes.push(node_id.clone());
} else {
progress.failed_nodes.push(node_id.clone());
if matches!(graph.on_error, ErrorStrategy::Stop) {
let error = result.error.clone();
node_results.insert(node_id, result);
join_set.abort_all();
return Ok(OrchestrationResult {
success: false,
output: Value::Null,
node_results,
duration_ms: start.elapsed().as_millis() as u64,
error,
});
}
}
node_results.insert(node_id, result);
}
Err(e) => {
tracing::warn!("[Orchestration] Task panicked: {}", e);
}
}
}
// Update progress
progress.progress_percent = ((progress.completed_nodes.len() + progress.failed_nodes.len())
* 100 / graph.nodes.len().max(1)) as u8;
progress.status = format!("Completed group with {} nodes", group.len());
progress_fn(progress.clone());
}
// Build final output
let output = orch_context.build_output(&graph.output_mapping);
let success = progress.failed_nodes.is_empty();
Ok(OrchestrationResult {
success,
output,
node_results,
duration_ms: start.elapsed().as_millis() as u64,
error: if success { None } else { Some("Some nodes failed".to_string()) },
})
}
async fn execute_plan(
&self,
plan: &OrchestrationPlan,
inputs: HashMap<String, Value>,
context: &SkillContext,
) -> Result<OrchestrationResult> {
self.execute_with_progress(&plan.graph, inputs, context, |_| {}).await
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_node_result_success() {
let result = NodeResult {
node_id: "test".to_string(),
success: true,
output: serde_json::json!({"data": "value"}),
error: None,
duration_ms: 100,
retries: 0,
skipped: false,
};
assert!(result.success);
assert_eq!(result.node_id, "test");
}
}