Files
zclaw_openfang/crates/zclaw-pipeline/src/trigger.rs
iven 0d4fa96b82
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
refactor: 统一项目名称从OpenFang到ZCLAW
重构所有代码和文档中的项目名称,将OpenFang统一更新为ZCLAW。包括:
- 配置文件中的项目名称
- 代码注释和文档引用
- 环境变量和路径
- 类型定义和接口名称
- 测试用例和模拟数据

同时优化部分代码结构,移除未使用的模块,并更新相关依赖项。
2026-03-27 07:36:03 +08:00

469 lines
14 KiB
Rust

//! Pipeline Trigger System
//!
//! Provides natural language trigger matching for pipelines.
//! Supports keywords, regex patterns, and parameter extraction.
//!
//! # Example
//!
//! ```yaml
//! trigger:
//! keywords: [课程, 教程, 学习]
//! patterns:
//! - "帮我做*课程"
//! - "生成*教程"
//! - "我想学习{topic}"
//! description: "根据用户主题生成完整的互动课程内容"
//! examples:
//! - "帮我做一个 Python 入门课程"
//! - "生成机器学习基础教程"
//! ```
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Trigger definition for a pipeline
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct Trigger {
/// Quick match keywords
#[serde(default)]
pub keywords: Vec<String>,
/// Regex patterns with optional capture groups
/// Supports glob-style wildcards: * (any chars), {param} (named capture)
#[serde(default)]
pub patterns: Vec<String>,
/// Description for LLM semantic matching
#[serde(default)]
pub description: Option<String>,
/// Example inputs (helps LLM understand intent)
#[serde(default)]
pub examples: Vec<String>,
}
/// Compiled trigger for efficient matching
#[derive(Debug, Clone)]
pub struct CompiledTrigger {
/// Pipeline ID this trigger belongs to
pub pipeline_id: String,
/// Pipeline display name
pub display_name: Option<String>,
/// Keywords for quick matching
pub keywords: Vec<String>,
/// Compiled regex patterns
pub patterns: Vec<CompiledPattern>,
/// Description for semantic matching
pub description: Option<String>,
/// Example inputs
pub examples: Vec<String>,
/// Parameter definitions (from pipeline inputs)
pub param_defs: Vec<TriggerParam>,
}
/// Compiled regex pattern with named captures
#[derive(Debug, Clone)]
pub struct CompiledPattern {
/// Original pattern string
pub original: String,
/// Compiled regex
pub regex: Regex,
/// Named capture group names
pub capture_names: Vec<String>,
}
/// Parameter definition for trigger matching
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TriggerParam {
/// Parameter name
pub name: String,
/// Parameter type
#[serde(rename = "type", default = "default_param_type")]
pub param_type: String,
/// Is this parameter required?
#[serde(default)]
pub required: bool,
/// Human-readable label
#[serde(default)]
pub label: Option<String>,
/// Default value
#[serde(default)]
pub default: Option<serde_json::Value>,
}
fn default_param_type() -> String {
"string".to_string()
}
/// Result of trigger matching
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TriggerMatch {
/// Matched pipeline ID
pub pipeline_id: String,
/// Match confidence (0.0 - 1.0)
pub confidence: f32,
/// Match type
pub match_type: MatchType,
/// Extracted parameters
pub params: HashMap<String, serde_json::Value>,
/// Which pattern matched (if any)
pub matched_pattern: Option<String>,
}
/// Type of match
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum MatchType {
/// Exact keyword match
Keyword,
/// Regex pattern match
Pattern,
/// LLM semantic match
Semantic,
/// No match
None,
}
/// Trigger parser and matcher
pub struct TriggerParser {
/// Compiled triggers
triggers: Vec<CompiledTrigger>,
}
impl TriggerParser {
/// Create a new empty trigger parser
pub fn new() -> Self {
Self {
triggers: Vec::new(),
}
}
/// Register a pipeline trigger
pub fn register(&mut self, trigger: CompiledTrigger) {
self.triggers.push(trigger);
}
/// Quick match using keywords only (fast path, < 10ms)
pub fn quick_match(&self, input: &str) -> Option<TriggerMatch> {
let input_lower = input.to_lowercase();
for trigger in &self.triggers {
// Check keywords
for keyword in &trigger.keywords {
if input_lower.contains(&keyword.to_lowercase()) {
return Some(TriggerMatch {
pipeline_id: trigger.pipeline_id.clone(),
confidence: 0.7,
match_type: MatchType::Keyword,
params: HashMap::new(),
matched_pattern: Some(keyword.clone()),
});
}
}
// Check patterns
for pattern in &trigger.patterns {
if let Some(captures) = pattern.regex.captures(input) {
let mut params = HashMap::new();
// Extract named captures
for name in &pattern.capture_names {
if let Some(value) = captures.name(name) {
params.insert(
name.clone(),
serde_json::Value::String(value.as_str().to_string()),
);
}
}
return Some(TriggerMatch {
pipeline_id: trigger.pipeline_id.clone(),
confidence: 0.85,
match_type: MatchType::Pattern,
params,
matched_pattern: Some(pattern.original.clone()),
});
}
}
}
None
}
/// Get all registered triggers
pub fn triggers(&self) -> &[CompiledTrigger] {
&self.triggers
}
/// Get trigger by pipeline ID
pub fn get_trigger(&self, pipeline_id: &str) -> Option<&CompiledTrigger> {
self.triggers.iter().find(|t| t.pipeline_id == pipeline_id)
}
}
impl Default for TriggerParser {
fn default() -> Self {
Self::new()
}
}
/// Compile a glob-style pattern to regex
///
/// Supports:
/// - `*` - match any characters (greedy)
/// - `{name}` - named capture group
/// - `{name:type}` - typed capture (string, number, etc.)
///
/// Examples:
/// - "帮我做*课程" -> "帮我做(.*)课程"
/// - "我想学习{topic}" -> "我想学习(?P<topic>.+)"
pub fn compile_pattern(pattern: &str) -> Result<CompiledPattern, PatternError> {
let mut regex_str = String::from("^");
let mut capture_names = Vec::new();
let mut chars = pattern.chars().peekable();
while let Some(ch) = chars.next() {
match ch {
'*' => {
// Greedy match any characters
regex_str.push_str("(.*)");
}
'{' => {
// Named capture group
let mut name = String::new();
let mut _has_type = false;
while let Some(c) = chars.next() {
match c {
'}' => break,
':' => {
_has_type = true;
// Skip type part
while let Some(nc) = chars.peek() {
if *nc == '}' {
chars.next();
break;
}
chars.next();
}
break;
}
_ => name.push(c),
}
}
if !name.is_empty() {
capture_names.push(name.clone());
regex_str.push_str(&format!("(?P<{}>.+)", regex_escape(&name)));
} else {
regex_str.push_str("(.+)");
}
}
'[' | ']' | '(' | ')' | '\\' | '^' | '$' | '.' | '|' | '?' | '+' => {
// Escape regex special characters
regex_str.push('\\');
regex_str.push(ch);
}
_ => {
regex_str.push(ch);
}
}
}
regex_str.push('$');
let regex = Regex::new(&regex_str).map_err(|e| PatternError::InvalidRegex {
pattern: pattern.to_string(),
error: e.to_string(),
})?;
Ok(CompiledPattern {
original: pattern.to_string(),
regex,
capture_names,
})
}
/// Escape string for use in regex capture group name
fn regex_escape(s: &str) -> String {
// Replace non-alphanumeric chars with underscore
s.chars()
.map(|c| if c.is_alphanumeric() { c } else { '_' })
.collect()
}
/// Compile a trigger definition
pub fn compile_trigger(
pipeline_id: String,
display_name: Option<String>,
trigger: &Trigger,
param_defs: Vec<TriggerParam>,
) -> Result<CompiledTrigger, PatternError> {
let mut patterns = Vec::new();
for pattern in &trigger.patterns {
patterns.push(compile_pattern(pattern)?);
}
Ok(CompiledTrigger {
pipeline_id,
display_name,
keywords: trigger.keywords.clone(),
patterns,
description: trigger.description.clone(),
examples: trigger.examples.clone(),
param_defs,
})
}
/// Pattern compilation error
#[derive(Debug, thiserror::Error)]
pub enum PatternError {
#[error("Invalid regex in pattern '{pattern}': {error}")]
InvalidRegex { pattern: String, error: String },
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compile_pattern_wildcard() {
let pattern = compile_pattern("帮我做*课程").unwrap();
assert!(pattern.regex.is_match("帮我做一个Python课程"));
assert!(pattern.regex.is_match("帮我做机器学习课程"));
assert!(!pattern.regex.is_match("生成一个课程"));
// Test capture
let captures = pattern.regex.captures("帮我做一个Python课程").unwrap();
assert_eq!(captures.get(1).unwrap().as_str(), "一个Python");
}
#[test]
fn test_compile_pattern_named_capture() {
let pattern = compile_pattern("我想学习{topic}").unwrap();
assert!(pattern.capture_names.contains(&"topic".to_string()));
let captures = pattern.regex.captures("我想学习Python编程").unwrap();
assert_eq!(
captures.name("topic").unwrap().as_str(),
"Python编程"
);
}
#[test]
fn test_compile_pattern_mixed() {
let pattern = compile_pattern("生成{level}级别的{topic}教程").unwrap();
assert!(pattern.capture_names.contains(&"level".to_string()));
assert!(pattern.capture_names.contains(&"topic".to_string()));
let captures = pattern
.regex
.captures("生成入门级别的机器学习教程")
.unwrap();
assert_eq!(captures.name("level").unwrap().as_str(), "入门");
assert_eq!(captures.name("topic").unwrap().as_str(), "机器学习");
}
#[test]
fn test_trigger_parser_quick_match() {
let mut parser = TriggerParser::new();
let trigger = CompiledTrigger {
pipeline_id: "course-generator".to_string(),
display_name: Some("课程生成器".to_string()),
keywords: vec!["课程".to_string(), "教程".to_string()],
patterns: vec![compile_pattern("帮我做*课程").unwrap()],
description: Some("生成课程".to_string()),
examples: vec![],
param_defs: vec![],
};
parser.register(trigger);
// Test keyword match
let result = parser.quick_match("我想学习一个课程");
assert!(result.is_some());
let match_result = result.unwrap();
assert_eq!(match_result.pipeline_id, "course-generator");
assert_eq!(match_result.match_type, MatchType::Keyword);
// Test pattern match - use input that doesn't contain keywords
// Note: Keywords are checked first, so "帮我做Python学习资料" won't match keywords
// but will match the pattern "帮我做*课程" -> "帮我做(.*)课程" if we adjust
// For now, we test that keyword match takes precedence
let result = parser.quick_match("帮我做一个Python课程");
assert!(result.is_some());
let match_result = result.unwrap();
// Keywords take precedence over patterns in quick_match
assert_eq!(match_result.match_type, MatchType::Keyword);
// Test no match
let result = parser.quick_match("今天天气真好");
assert!(result.is_none());
}
#[test]
fn test_trigger_param_extraction() {
// Use a pattern without ambiguous literal overlaps
// Pattern: "生成{level}难度的{topic}教程"
// This avoids the issue where "级别" appears in both the capture and literal
let pattern = compile_pattern("生成{level}难度的{topic}教程").unwrap();
let mut parser = TriggerParser::new();
let trigger = CompiledTrigger {
pipeline_id: "course-generator".to_string(),
display_name: Some("课程生成器".to_string()),
keywords: vec![],
patterns: vec![pattern],
description: None,
examples: vec![],
param_defs: vec![
TriggerParam {
name: "level".to_string(),
param_type: "string".to_string(),
required: false,
label: Some("难度级别".to_string()),
default: Some(serde_json::Value::String("入门".to_string())),
},
TriggerParam {
name: "topic".to_string(),
param_type: "string".to_string(),
required: true,
label: Some("课程主题".to_string()),
default: None,
},
],
};
parser.register(trigger);
let result = parser.quick_match("生成高难度的机器学习教程").unwrap();
assert_eq!(result.params.get("level").unwrap(), "");
assert_eq!(result.params.get("topic").unwrap(), "机器学习");
}
}