From 043824c722532e33fc2b8672144a7c34aa0edda2 Mon Sep 17 00:00:00 2001 From: iven Date: Wed, 15 Apr 2026 13:34:27 +0800 Subject: [PATCH] =?UTF-8?q?perf(runtime):=20nl=5Fschedule=20=E6=AD=A3?= =?UTF-8?q?=E5=88=99=E9=A2=84=E7=BC=96=E8=AF=91=20=E2=80=94=209=E4=B8=AA?= =?UTF-8?q?=20LazyLock=20=E9=9D=99=E6=80=81=E6=9B=BF=E4=BB=A3=E6=AF=8F?= =?UTF-8?q?=E6=AC=A1=E8=B0=83=E7=94=A8=E7=BC=96=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将 parse_nl_schedule 中 9 个 Regex::new() 从函数内每次调用编译 提升为 std::sync::LazyLock 静态变量,首次调用时编译一次, 后续调用直接复用。16 个单元测试全部通过。 --- crates/zclaw-runtime/src/nl_schedule.rs | 189 +++++++++++++----------- 1 file changed, 101 insertions(+), 88 deletions(-) diff --git a/crates/zclaw-runtime/src/nl_schedule.rs b/crates/zclaw-runtime/src/nl_schedule.rs index 1795db6..6735fd3 100644 --- a/crates/zclaw-runtime/src/nl_schedule.rs +++ b/crates/zclaw-runtime/src/nl_schedule.rs @@ -7,7 +7,10 @@ //! //! Lives in `zclaw-runtime` because it's a pure text→cron utility with no kernel dependency. +use std::sync::LazyLock; + use chrono::Timelike; +use regex::Regex; use serde::{Deserialize, Serialize}; use zclaw_types::AgentId; @@ -56,21 +59,79 @@ pub enum ScheduleParseResult { } // --------------------------------------------------------------------------- -// Regex pattern library +// Pre-compiled regex patterns (LazyLock — compiled once, reused forever) // --------------------------------------------------------------------------- -/// A single pattern for matching Chinese time expressions. -#[allow(dead_code)] -struct SchedulePattern { - /// Regex pattern string - regex: &'static str, - /// Cron template — use {h} for hour, {m} for minute, {dow} for day-of-week, {dom} for day-of-month - cron_template: &'static str, - /// Human description template - description: &'static str, - /// Base confidence for this pattern - confidence: f32, -} +/// Time-of-day period fragment used across multiple patterns. +const PERIOD: &str = "(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?"; + +// extract_task_description +static RE_TIME_STRIP: LazyLock = LazyLock::new(|| { + Regex::new( + r"^(?:凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?\d{1,2}[点时::]\d{0,2}分?" + ).unwrap() +}); + +// try_every_day +static RE_EVERY_DAY_EXACT: LazyLock = LazyLock::new(|| { + Regex::new(&format!( + r"(?:每天|每日)(?:的)?{}(\d{{1,2}})[点时::](\d{{1,2}})?", + PERIOD + )).unwrap() +}); + +static RE_EVERY_DAY_PERIOD: LazyLock = LazyLock::new(|| { + Regex::new( + r"(?:每天|每日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)" + ).unwrap() +}); + +// try_every_week +static RE_EVERY_WEEK: LazyLock = LazyLock::new(|| { + Regex::new(&format!( + r"(?:每周|每个?星期|每个?礼拜)(一|二|三|四|五|六|日|天|周一|周二|周三|周四|周五|周六|周日|周天|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天)(?:的)?{}(\d{{1,2}})[点时::](\d{{1,2}})?", + PERIOD + )).unwrap() +}); + +// try_workday +static RE_WORKDAY_EXACT: LazyLock = LazyLock::new(|| { + Regex::new(&format!( + r"(?:工作日|每个?工作日|工作日(?:的)?){}(\d{{1,2}})[点时::](\d{{1,2}})?", + PERIOD + )).unwrap() +}); + +static RE_WORKDAY_PERIOD: LazyLock = LazyLock::new(|| { + Regex::new( + r"(?:工作日|每个?工作日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)" + ).unwrap() +}); + +// try_interval +static RE_INTERVAL: LazyLock = LazyLock::new(|| { + Regex::new(r"每(\d{1,2})(小时|分钟|分|钟|个小时)").unwrap() +}); + +// try_monthly +static RE_MONTHLY: LazyLock = LazyLock::new(|| { + Regex::new(&format!( + r"(?:每月|每个月)(?:的)?(\d{{1,2}})[号日](?:的)?{}(\d{{1,2}})?[点时::]?(\d{{1,2}})?", + PERIOD + )).unwrap() +}); + +// try_one_shot +static RE_ONE_SHOT: LazyLock = LazyLock::new(|| { + Regex::new(&format!( + r"(明天|后天|大后天)(?:的)?{}(\d{{1,2}})[点时::](\d{{1,2}})?", + PERIOD + )).unwrap() +}); + +// --------------------------------------------------------------------------- +// Helper lookups (pure functions, no allocation) +// --------------------------------------------------------------------------- /// Chinese time period keywords → hour mapping fn period_to_hour(period: &str) -> Option { @@ -100,6 +161,23 @@ fn weekday_to_cron(day: &str) -> Option<&'static str> { } } +/// Adjust hour based on time-of-day period. Chinese 12-hour convention: +/// 下午3点 = 15, 晚上8点 = 20, etc. Morning hours stay as-is. +fn adjust_hour_for_period(hour: u32, period: Option<&str>) -> u32 { + if let Some(p) = period { + match p { + "下午" | "午后" => { if hour < 12 { hour + 12 } else { hour } } + "晚上" | "晚间" | "夜里" | "夜晚" => { if hour < 12 { hour + 12 } else { hour } } + "傍晚" | "黄昏" => { if hour < 12 { hour + 12 } else { hour } } + "中午" => { if hour == 12 { 12 } else if hour < 12 { hour + 12 } else { hour } } + "半夜" | "午夜" => { if hour == 12 { 0 } else { hour } } + _ => hour, + } + } else { + hour + } +} + // --------------------------------------------------------------------------- // Parser implementation // --------------------------------------------------------------------------- @@ -114,35 +192,23 @@ pub fn parse_nl_schedule(input: &str, default_agent_id: &AgentId) -> SchedulePar return ScheduleParseResult::Unclear; } - // Extract task description (everything after keywords like "提醒我", "帮我") let task_description = extract_task_description(input); - // --- Pattern 1: 每天 + 时间 --- if let Some(result) = try_every_day(input, &task_description, default_agent_id) { return result; } - - // --- Pattern 2: 每周N + 时间 --- if let Some(result) = try_every_week(input, &task_description, default_agent_id) { return result; } - - // --- Pattern 3: 工作日 + 时间 --- if let Some(result) = try_workday(input, &task_description, default_agent_id) { return result; } - - // --- Pattern 4: 每N小时/分钟 --- if let Some(result) = try_interval(input, &task_description, default_agent_id) { return result; } - - // --- Pattern 5: 每月N号 --- if let Some(result) = try_monthly(input, &task_description, default_agent_id) { return result; } - - // --- Pattern 6: 明天/后天 + 时间 (one-shot) --- if let Some(result) = try_one_shot(input, &task_description, default_agent_id) { return result; } @@ -161,13 +227,7 @@ fn extract_task_description(input: &str) -> String { let mut desc = input.to_string(); - // Strip prefixes + time expressions in alternating passes until stable - let time_re = regex::Regex::new( - r"^(?:凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?\d{1,2}[点时::]\d{0,2}分?" - ).unwrap_or_else(|_| regex::Regex::new("").unwrap()); - for _ in 0..3 { - // Pass 1: strip prefixes loop { let mut stripped = false; for prefix in &strip_prefixes { @@ -178,8 +238,7 @@ fn extract_task_description(input: &str) -> String { } if !stripped { break; } } - // Pass 2: strip time expressions - let new_desc = time_re.replace(&desc, "").to_string(); + let new_desc = RE_TIME_STRIP.replace(&desc, "").to_string(); if new_desc == desc { break; } desc = new_desc; } @@ -187,32 +246,10 @@ fn extract_task_description(input: &str) -> String { desc.trim().to_string() } -// -- Pattern matchers -- - -/// Adjust hour based on time-of-day period. Chinese 12-hour convention: -/// 下午3点 = 15, 晚上8点 = 20, etc. Morning hours stay as-is. -fn adjust_hour_for_period(hour: u32, period: Option<&str>) -> u32 { - if let Some(p) = period { - match p { - "下午" | "午后" => { if hour < 12 { hour + 12 } else { hour } } - "晚上" | "晚间" | "夜里" | "夜晚" => { if hour < 12 { hour + 12 } else { hour } } - "傍晚" | "黄昏" => { if hour < 12 { hour + 12 } else { hour } } - "中午" => { if hour == 12 { 12 } else if hour < 12 { hour + 12 } else { hour } } - "半夜" | "午夜" => { if hour == 12 { 0 } else { hour } } - _ => hour, - } - } else { - hour - } -} - -const PERIOD_PATTERN: &str = "(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?"; +// -- Pattern matchers (all use pre-compiled statics) -- fn try_every_day(input: &str, task_desc: &str, agent_id: &AgentId) -> Option { - let re = regex::Regex::new( - &format!(r"(?:每天|每日)(?:的)?{}(\d{{1,2}})[点时::](\d{{1,2}})?", PERIOD_PATTERN) - ).ok()?; - if let Some(caps) = re.captures(input) { + if let Some(caps) = RE_EVERY_DAY_EXACT.captures(input) { let period = caps.get(1).map(|m| m.as_str()); let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?; let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0); @@ -229,9 +266,7 @@ fn try_every_day(input: &str, task_desc: &str, agent_id: &AgentId) -> Option Option Option { - let re = regex::Regex::new( - &format!(r"(?:每周|每个?星期|每个?礼拜)(一|二|三|四|五|六|日|天|周一|周二|周三|周四|周五|周六|周日|周天|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天)(?:的)?{}(\d{{1,2}})[点时::](\d{{1,2}})?", PERIOD_PATTERN) - ).ok()?; - - let caps = re.captures(input)?; + let caps = RE_EVERY_WEEK.captures(input)?; let day_str = caps.get(1)?.as_str(); let dow = weekday_to_cron(day_str)?; let period = caps.get(2).map(|m| m.as_str()); @@ -273,11 +304,7 @@ fn try_every_week(input: &str, task_desc: &str, agent_id: &AgentId) -> Option Option { - let re = regex::Regex::new( - &format!(r"(?:工作日|每个?工作日|工作日(?:的)?){}(\d{{1,2}})[点时::](\d{{1,2}})?", PERIOD_PATTERN) - ).ok()?; - - if let Some(caps) = re.captures(input) { + if let Some(caps) = RE_WORKDAY_EXACT.captures(input) { let period = caps.get(1).map(|m| m.as_str()); let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?; let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0); @@ -294,11 +321,7 @@ fn try_workday(input: &str, task_desc: &str, agent_id: &AgentId) -> Option Option Option { - // "每2小时", "每30分钟", "每N小时/分钟" - let re = regex::Regex::new(r"每(\d{1,2})(小时|分钟|分|钟|个小时)").ok()?; - if let Some(caps) = re.captures(input) { + if let Some(caps) = RE_INTERVAL.captures(input) { let n: u32 = caps.get(1)?.as_str().parse().ok()?; if n == 0 { return None; @@ -341,11 +362,7 @@ fn try_interval(input: &str, task_desc: &str, agent_id: &AgentId) -> Option Option { - let re = regex::Regex::new( - &format!(r"(?:每月|每个月)(?:的)?(\d{{1,2}})[号日](?:的)?{}(\d{{1,2}})?[点时::]?(\d{{1,2}})?", PERIOD_PATTERN) - ).ok()?; - - if let Some(caps) = re.captures(input) { + if let Some(caps) = RE_MONTHLY.captures(input) { let day: u32 = caps.get(1)?.as_str().parse().ok()?; let period = caps.get(2).map(|m| m.as_str()); let raw_hour: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(9)).unwrap_or(9); @@ -367,11 +384,7 @@ fn try_monthly(input: &str, task_desc: &str, agent_id: &AgentId) -> Option Option { - let re = regex::Regex::new( - &format!(r"(明天|后天|大后天)(?:的)?{}(\d{{1,2}})[点时::](\d{{1,2}})?", PERIOD_PATTERN) - ).ok()?; - - let caps = re.captures(input)?; + let caps = RE_ONE_SHOT.captures(input)?; let day_offset = match caps.get(1)?.as_str() { "明天" => 1, "后天" => 2,