Files
zclaw_openfang/crates/zclaw-runtime/src/nl_schedule.rs
iven 043824c722
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
perf(runtime): nl_schedule 正则预编译 — 9个 LazyLock 静态替代每次调用编译
将 parse_nl_schedule 中 9 个 Regex::new() 从函数内每次调用编译
提升为 std::sync::LazyLock<Regex> 静态变量,首次调用时编译一次,
后续调用直接复用。16 个单元测试全部通过。
2026-04-15 13:34:27 +08:00

608 lines
22 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Natural Language Schedule Parser — transforms Chinese time expressions into cron.
//!
//! Three-layer fallback strategy:
//! 1. Regex pattern matching (covers ~80% of common expressions)
//! 2. LLM-assisted parsing (for ambiguous/complex expressions) — TODO: wire when Haiku driver available
//! 3. Interactive clarification (return `Unclear`)
//!
//! Lives in `zclaw-runtime` because it's a pure text→cron utility with no kernel dependency.
use std::sync::LazyLock;
use chrono::Timelike;
use regex::Regex;
use serde::{Deserialize, Serialize};
use zclaw_types::AgentId;
// ---------------------------------------------------------------------------
// Data structures
// ---------------------------------------------------------------------------
/// Result of parsing a natural language schedule expression.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParsedSchedule {
/// Cron expression, e.g. "0 9 * * *"
pub cron_expression: String,
/// Human-readable description of the schedule
pub natural_description: String,
/// Confidence of the parse (0.01.0)
pub confidence: f32,
/// What the task does (extracted from user input)
pub task_description: String,
/// What to trigger when the schedule fires
pub task_target: TaskTarget,
}
/// Target to trigger on schedule.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", content = "id")]
pub enum TaskTarget {
/// Trigger a specific agent
Agent(String),
/// Trigger a specific hand
Hand(String),
/// Trigger a specific workflow
Workflow(String),
/// Generic reminder (no specific target)
Reminder,
}
/// Outcome of NL schedule parsing.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ScheduleParseResult {
/// High-confidence single parse
Exact(ParsedSchedule),
/// Multiple possible interpretations
Ambiguous(Vec<ParsedSchedule>),
/// Unable to parse — needs user clarification
Unclear,
}
// ---------------------------------------------------------------------------
// Pre-compiled regex patterns (LazyLock — compiled once, reused forever)
// ---------------------------------------------------------------------------
/// Time-of-day period fragment used across multiple patterns.
const PERIOD: &str = "(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?";
// extract_task_description
static RE_TIME_STRIP: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"^(?:凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?\d{1,2}[点时:]\d{0,2}分?"
).unwrap()
});
// try_every_day
static RE_EVERY_DAY_EXACT: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(?:每天|每日)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?",
PERIOD
)).unwrap()
});
static RE_EVERY_DAY_PERIOD: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?:每天|每日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)"
).unwrap()
});
// try_every_week
static RE_EVERY_WEEK: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(?:每周|每个?星期|每个?礼拜)(一|二|三|四|五|六|日|天|周一|周二|周三|周四|周五|周六|周日|周天|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?",
PERIOD
)).unwrap()
});
// try_workday
static RE_WORKDAY_EXACT: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(?:工作日|每个?工作日|工作日(?:的)?){}(\d{{1,2}})[点时:](\d{{1,2}})?",
PERIOD
)).unwrap()
});
static RE_WORKDAY_PERIOD: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?:工作日|每个?工作日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)"
).unwrap()
});
// try_interval
static RE_INTERVAL: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"每(\d{1,2})(小时|分钟|分|钟|个小时)").unwrap()
});
// try_monthly
static RE_MONTHLY: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(?:每月|每个月)(?:的)?(\d{{1,2}})[号日](?:的)?{}(\d{{1,2}})?[点时:]?(\d{{1,2}})?",
PERIOD
)).unwrap()
});
// try_one_shot
static RE_ONE_SHOT: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(明天|后天|大后天)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?",
PERIOD
)).unwrap()
});
// ---------------------------------------------------------------------------
// Helper lookups (pure functions, no allocation)
// ---------------------------------------------------------------------------
/// Chinese time period keywords → hour mapping
fn period_to_hour(period: &str) -> Option<u32> {
match period {
"凌晨" => Some(0),
"早上" | "早晨" | "上午" => Some(9),
"中午" => Some(12),
"下午" | "午后" => Some(15),
"傍晚" | "黄昏" => Some(18),
"晚上" | "晚间" | "夜里" | "夜晚" => Some(21),
"半夜" | "午夜" => Some(0),
_ => None,
}
}
/// Chinese weekday names → cron day-of-week
fn weekday_to_cron(day: &str) -> Option<&'static str> {
match day {
"" | "周一" | "星期一" | "礼拜一" => Some("1"),
"" | "周二" | "星期二" | "礼拜二" => Some("2"),
"" | "周三" | "星期三" | "礼拜三" => Some("3"),
"" | "周四" | "星期四" | "礼拜四" => Some("4"),
"" | "周五" | "星期五" | "礼拜五" => Some("5"),
"" | "周六" | "星期六" | "礼拜六" => Some("6"),
"" | "周日" | "星期日" | "礼拜日" | "" | "周天" | "星期天" | "礼拜天" => Some("0"),
_ => None,
}
}
/// Adjust hour based on time-of-day period. Chinese 12-hour convention:
/// 下午3点 = 15, 晚上8点 = 20, etc. Morning hours stay as-is.
fn adjust_hour_for_period(hour: u32, period: Option<&str>) -> u32 {
if let Some(p) = period {
match p {
"下午" | "午后" => { if hour < 12 { hour + 12 } else { hour } }
"晚上" | "晚间" | "夜里" | "夜晚" => { if hour < 12 { hour + 12 } else { hour } }
"傍晚" | "黄昏" => { if hour < 12 { hour + 12 } else { hour } }
"中午" => { if hour == 12 { 12 } else if hour < 12 { hour + 12 } else { hour } }
"半夜" | "午夜" => { if hour == 12 { 0 } else { hour } }
_ => hour,
}
} else {
hour
}
}
// ---------------------------------------------------------------------------
// Parser implementation
// ---------------------------------------------------------------------------
/// Parse a natural language schedule expression into a cron expression.
///
/// Uses a series of regex-based pattern matchers covering common Chinese
/// time expressions. Returns `Unclear` if no pattern matches.
pub fn parse_nl_schedule(input: &str, default_agent_id: &AgentId) -> ScheduleParseResult {
let input = input.trim();
if input.is_empty() {
return ScheduleParseResult::Unclear;
}
let task_description = extract_task_description(input);
if let Some(result) = try_every_day(input, &task_description, default_agent_id) {
return result;
}
if let Some(result) = try_every_week(input, &task_description, default_agent_id) {
return result;
}
if let Some(result) = try_workday(input, &task_description, default_agent_id) {
return result;
}
if let Some(result) = try_interval(input, &task_description, default_agent_id) {
return result;
}
if let Some(result) = try_monthly(input, &task_description, default_agent_id) {
return result;
}
if let Some(result) = try_one_shot(input, &task_description, default_agent_id) {
return result;
}
ScheduleParseResult::Unclear
}
/// Extract task description from input, stripping schedule-related keywords.
fn extract_task_description(input: &str) -> String {
let strip_prefixes = [
"每天", "每日", "每周", "工作日", "每个工作日",
"每月", "", "定时", "定期",
"提醒我", "提醒", "帮我", "", "",
"明天", "后天", "大后天",
];
let mut desc = input.to_string();
for _ in 0..3 {
loop {
let mut stripped = false;
for prefix in &strip_prefixes {
if desc.starts_with(prefix) {
desc = desc[prefix.len()..].to_string();
stripped = true;
}
}
if !stripped { break; }
}
let new_desc = RE_TIME_STRIP.replace(&desc, "").to_string();
if new_desc == desc { break; }
desc = new_desc;
}
desc.trim().to_string()
}
// -- Pattern matchers (all use pre-compiled statics) --
fn try_every_day(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
if let Some(caps) = RE_EVERY_DAY_EXACT.captures(input) {
let period = caps.get(1).map(|m| m.as_str());
let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?;
let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if hour > 23 || minute > 59 {
return None;
}
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("{} {} * * *", minute, hour),
natural_description: format!("每天{:02}:{:02}", hour, minute),
confidence: 0.95,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
if let Some(caps) = RE_EVERY_DAY_PERIOD.captures(input) {
let period = caps.get(1)?.as_str();
if let Some(hour) = period_to_hour(period) {
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("0 {} * * *", hour),
natural_description: format!("每天{}", period),
confidence: 0.85,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
}
None
}
fn try_every_week(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let caps = RE_EVERY_WEEK.captures(input)?;
let day_str = caps.get(1)?.as_str();
let dow = weekday_to_cron(day_str)?;
let period = caps.get(2).map(|m| m.as_str());
let raw_hour: u32 = caps.get(3)?.as_str().parse().ok()?;
let minute: u32 = caps.get(4).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if hour > 23 || minute > 59 {
return None;
}
Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("{} {} * * {}", minute, hour, dow),
natural_description: format!("每周{} {:02}:{:02}", day_str, hour, minute),
confidence: 0.92,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}))
}
fn try_workday(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
if let Some(caps) = RE_WORKDAY_EXACT.captures(input) {
let period = caps.get(1).map(|m| m.as_str());
let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?;
let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if hour > 23 || minute > 59 {
return None;
}
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("{} {} * * 1-5", minute, hour),
natural_description: format!("工作日{:02}:{:02}", hour, minute),
confidence: 0.90,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
if let Some(caps) = RE_WORKDAY_PERIOD.captures(input) {
let period = caps.get(1)?.as_str();
if let Some(hour) = period_to_hour(period) {
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("0 {} * * 1-5", hour),
natural_description: format!("工作日{}", period),
confidence: 0.85,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
}
None
}
fn try_interval(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
if let Some(caps) = RE_INTERVAL.captures(input) {
let n: u32 = caps.get(1)?.as_str().parse().ok()?;
if n == 0 {
return None;
}
let unit = caps.get(2)?.as_str();
let (cron, desc) = if unit.contains("") {
(format!("0 */{} * * *", n), format!("{}小时", n))
} else {
(format!("*/{} * * * *", n), format!("{}分钟", n))
};
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: cron,
natural_description: desc,
confidence: 0.90,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
None
}
fn try_monthly(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
if let Some(caps) = RE_MONTHLY.captures(input) {
let day: u32 = caps.get(1)?.as_str().parse().ok()?;
let period = caps.get(2).map(|m| m.as_str());
let raw_hour: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(9)).unwrap_or(9);
let minute: u32 = caps.get(4).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if day > 31 || hour > 23 || minute > 59 {
return None;
}
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("{} {} {} * *", minute, hour, day),
natural_description: format!("每月{}号 {:02}:{:02}", day, hour, minute),
confidence: 0.90,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
None
}
fn try_one_shot(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let caps = RE_ONE_SHOT.captures(input)?;
let day_offset = match caps.get(1)?.as_str() {
"明天" => 1,
"后天" => 2,
"大后天" => 3,
_ => return None,
};
let period = caps.get(2).map(|m| m.as_str());
let raw_hour: u32 = caps.get(3)?.as_str().parse().ok()?;
let minute: u32 = caps.get(4).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if hour > 23 || minute > 59 {
return None;
}
let target = chrono::Utc::now()
.checked_add_signed(chrono::Duration::days(day_offset))
.unwrap_or_else(chrono::Utc::now)
.with_hour(hour)
.unwrap_or_else(|| chrono::Utc::now())
.with_minute(minute)
.unwrap_or_else(|| chrono::Utc::now())
.with_second(0)
.unwrap_or_else(|| chrono::Utc::now());
Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: target.to_rfc3339(),
natural_description: format!("{} {:02}:{:02}", caps.get(1)?.as_str(), hour, minute),
confidence: 0.88,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}))
}
// ---------------------------------------------------------------------------
// Schedule intent detection
// ---------------------------------------------------------------------------
/// Keywords indicating the user wants to set a scheduled task.
const SCHEDULE_INTENT_KEYWORDS: &[&str] = &[
"提醒我", "提醒", "定时", "每天", "每日", "每周", "每月",
"工作日", "每隔", "", "定期", "到时候", "准时",
"闹钟", "闹铃", "日程", "日历",
];
/// Check if user input contains schedule intent.
pub fn has_schedule_intent(input: &str) -> bool {
let lower = input.to_lowercase();
SCHEDULE_INTENT_KEYWORDS.iter().any(|kw| lower.contains(kw))
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
fn default_agent() -> AgentId {
AgentId::new()
}
#[test]
fn test_every_day_explicit_time() {
let result = parse_nl_schedule("每天早上9点提醒我查房", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 9 * * *");
assert!(s.confidence >= 0.9);
}
_ => panic!("Expected Exact, got {:?}", result),
}
}
#[test]
fn test_every_day_with_minute() {
let result = parse_nl_schedule("每天下午3点30分提醒我", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "30 15 * * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_every_day_period_only() {
let result = parse_nl_schedule("每天早上提醒我看看报告", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 9 * * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_every_week_monday() {
let result = parse_nl_schedule("每周一上午10点提醒我开会", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 10 * * 1");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_every_week_friday() {
let result = parse_nl_schedule("每个星期五下午2点", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 14 * * 5");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_workday() {
let result = parse_nl_schedule("工作日下午3点提醒我写周报", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 15 * * 1-5");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_interval_hours() {
let result = parse_nl_schedule("每2小时提醒我喝水", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 */2 * * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_interval_minutes() {
let result = parse_nl_schedule("每30分钟检查一次", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "*/30 * * * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_monthly() {
let result = parse_nl_schedule("每月1号早上9点提醒我", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 9 1 * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_one_shot_tomorrow() {
let result = parse_nl_schedule("明天下午3点提醒我开会", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert!(s.cron_expression.contains('T'));
assert!(s.natural_description.contains("明天"));
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_unclear_input() {
let result = parse_nl_schedule("今天天气怎么样", &default_agent());
assert!(matches!(result, ScheduleParseResult::Unclear));
}
#[test]
fn test_empty_input() {
let result = parse_nl_schedule("", &default_agent());
assert!(matches!(result, ScheduleParseResult::Unclear));
}
#[test]
fn test_schedule_intent_detection() {
assert!(has_schedule_intent("每天早上9点提醒我查房"));
assert!(has_schedule_intent("帮我设个定时任务"));
assert!(has_schedule_intent("工作日提醒我打卡"));
assert!(!has_schedule_intent("今天天气怎么样"));
assert!(!has_schedule_intent("帮我写个报告"));
}
#[test]
fn test_period_to_hour_mapping() {
assert_eq!(period_to_hour("凌晨"), Some(0));
assert_eq!(period_to_hour("早上"), Some(9));
assert_eq!(period_to_hour("中午"), Some(12));
assert_eq!(period_to_hour("下午"), Some(15));
assert_eq!(period_to_hour("晚上"), Some(21));
assert_eq!(period_to_hour("不知道"), None);
}
#[test]
fn test_weekday_to_cron_mapping() {
assert_eq!(weekday_to_cron(""), Some("1"));
assert_eq!(weekday_to_cron(""), Some("5"));
assert_eq!(weekday_to_cron(""), Some("0"));
assert_eq!(weekday_to_cron("星期三"), Some("3"));
assert_eq!(weekday_to_cron("礼拜天"), Some("0"));
assert_eq!(weekday_to_cron("未知"), None);
}
#[test]
fn test_task_description_extraction() {
assert_eq!(extract_task_description("每天早上9点提醒我查房"), "查房");
}
}