perf(runtime): nl_schedule 正则预编译 — 9个 LazyLock 静态替代每次调用编译
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

将 parse_nl_schedule 中 9 个 Regex::new() 从函数内每次调用编译
提升为 std::sync::LazyLock<Regex> 静态变量,首次调用时编译一次,
后续调用直接复用。16 个单元测试全部通过。
This commit is contained in:
iven
2026-04-15 13:34:27 +08:00
parent bd12bdb62b
commit 043824c722

View File

@@ -7,7 +7,10 @@
//! //!
//! Lives in `zclaw-runtime` because it's a pure text→cron utility with no kernel dependency. //! Lives in `zclaw-runtime` because it's a pure text→cron utility with no kernel dependency.
use std::sync::LazyLock;
use chrono::Timelike; use chrono::Timelike;
use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use zclaw_types::AgentId; use zclaw_types::AgentId;
@@ -56,21 +59,79 @@ pub enum ScheduleParseResult {
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Regex pattern library // Pre-compiled regex patterns (LazyLock — compiled once, reused forever)
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
/// A single pattern for matching Chinese time expressions. /// Time-of-day period fragment used across multiple patterns.
#[allow(dead_code)] const PERIOD: &str = "(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?";
struct SchedulePattern {
/// Regex pattern string // extract_task_description
regex: &'static str, static RE_TIME_STRIP: LazyLock<Regex> = LazyLock::new(|| {
/// Cron template — use {h} for hour, {m} for minute, {dow} for day-of-week, {dom} for day-of-month Regex::new(
cron_template: &'static str, r"^(?:凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?\d{1,2}[点时:]\d{0,2}分?"
/// Human description template ).unwrap()
description: &'static str, });
/// Base confidence for this pattern
confidence: f32, // try_every_day
} static RE_EVERY_DAY_EXACT: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(?:每天|每日)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?",
PERIOD
)).unwrap()
});
static RE_EVERY_DAY_PERIOD: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?:每天|每日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)"
).unwrap()
});
// try_every_week
static RE_EVERY_WEEK: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(?:每周|每个?星期|每个?礼拜)(一|二|三|四|五|六|日|天|周一|周二|周三|周四|周五|周六|周日|周天|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?",
PERIOD
)).unwrap()
});
// try_workday
static RE_WORKDAY_EXACT: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(?:工作日|每个?工作日|工作日(?:的)?){}(\d{{1,2}})[点时:](\d{{1,2}})?",
PERIOD
)).unwrap()
});
static RE_WORKDAY_PERIOD: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"(?:工作日|每个?工作日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)"
).unwrap()
});
// try_interval
static RE_INTERVAL: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"每(\d{1,2})(小时|分钟|分|钟|个小时)").unwrap()
});
// try_monthly
static RE_MONTHLY: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(?:每月|每个月)(?:的)?(\d{{1,2}})[号日](?:的)?{}(\d{{1,2}})?[点时:]?(\d{{1,2}})?",
PERIOD
)).unwrap()
});
// try_one_shot
static RE_ONE_SHOT: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(&format!(
r"(明天|后天|大后天)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?",
PERIOD
)).unwrap()
});
// ---------------------------------------------------------------------------
// Helper lookups (pure functions, no allocation)
// ---------------------------------------------------------------------------
/// Chinese time period keywords → hour mapping /// Chinese time period keywords → hour mapping
fn period_to_hour(period: &str) -> Option<u32> { fn period_to_hour(period: &str) -> Option<u32> {
@@ -100,6 +161,23 @@ fn weekday_to_cron(day: &str) -> Option<&'static str> {
} }
} }
/// Adjust hour based on time-of-day period. Chinese 12-hour convention:
/// 下午3点 = 15, 晚上8点 = 20, etc. Morning hours stay as-is.
fn adjust_hour_for_period(hour: u32, period: Option<&str>) -> u32 {
if let Some(p) = period {
match p {
"下午" | "午后" => { if hour < 12 { hour + 12 } else { hour } }
"晚上" | "晚间" | "夜里" | "夜晚" => { if hour < 12 { hour + 12 } else { hour } }
"傍晚" | "黄昏" => { if hour < 12 { hour + 12 } else { hour } }
"中午" => { if hour == 12 { 12 } else if hour < 12 { hour + 12 } else { hour } }
"半夜" | "午夜" => { if hour == 12 { 0 } else { hour } }
_ => hour,
}
} else {
hour
}
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Parser implementation // Parser implementation
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@@ -114,35 +192,23 @@ pub fn parse_nl_schedule(input: &str, default_agent_id: &AgentId) -> SchedulePar
return ScheduleParseResult::Unclear; return ScheduleParseResult::Unclear;
} }
// Extract task description (everything after keywords like "提醒我", "帮我")
let task_description = extract_task_description(input); let task_description = extract_task_description(input);
// --- Pattern 1: 每天 + 时间 ---
if let Some(result) = try_every_day(input, &task_description, default_agent_id) { if let Some(result) = try_every_day(input, &task_description, default_agent_id) {
return result; return result;
} }
// --- Pattern 2: 每周N + 时间 ---
if let Some(result) = try_every_week(input, &task_description, default_agent_id) { if let Some(result) = try_every_week(input, &task_description, default_agent_id) {
return result; return result;
} }
// --- Pattern 3: 工作日 + 时间 ---
if let Some(result) = try_workday(input, &task_description, default_agent_id) { if let Some(result) = try_workday(input, &task_description, default_agent_id) {
return result; return result;
} }
// --- Pattern 4: 每N小时/分钟 ---
if let Some(result) = try_interval(input, &task_description, default_agent_id) { if let Some(result) = try_interval(input, &task_description, default_agent_id) {
return result; return result;
} }
// --- Pattern 5: 每月N号 ---
if let Some(result) = try_monthly(input, &task_description, default_agent_id) { if let Some(result) = try_monthly(input, &task_description, default_agent_id) {
return result; return result;
} }
// --- Pattern 6: 明天/后天 + 时间 (one-shot) ---
if let Some(result) = try_one_shot(input, &task_description, default_agent_id) { if let Some(result) = try_one_shot(input, &task_description, default_agent_id) {
return result; return result;
} }
@@ -161,13 +227,7 @@ fn extract_task_description(input: &str) -> String {
let mut desc = input.to_string(); let mut desc = input.to_string();
// Strip prefixes + time expressions in alternating passes until stable
let time_re = regex::Regex::new(
r"^(?:凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?\d{1,2}[点时:]\d{0,2}分?"
).unwrap_or_else(|_| regex::Regex::new("").unwrap());
for _ in 0..3 { for _ in 0..3 {
// Pass 1: strip prefixes
loop { loop {
let mut stripped = false; let mut stripped = false;
for prefix in &strip_prefixes { for prefix in &strip_prefixes {
@@ -178,8 +238,7 @@ fn extract_task_description(input: &str) -> String {
} }
if !stripped { break; } if !stripped { break; }
} }
// Pass 2: strip time expressions let new_desc = RE_TIME_STRIP.replace(&desc, "").to_string();
let new_desc = time_re.replace(&desc, "").to_string();
if new_desc == desc { break; } if new_desc == desc { break; }
desc = new_desc; desc = new_desc;
} }
@@ -187,32 +246,10 @@ fn extract_task_description(input: &str) -> String {
desc.trim().to_string() desc.trim().to_string()
} }
// -- Pattern matchers -- // -- Pattern matchers (all use pre-compiled statics) --
/// Adjust hour based on time-of-day period. Chinese 12-hour convention:
/// 下午3点 = 15, 晚上8点 = 20, etc. Morning hours stay as-is.
fn adjust_hour_for_period(hour: u32, period: Option<&str>) -> u32 {
if let Some(p) = period {
match p {
"下午" | "午后" => { if hour < 12 { hour + 12 } else { hour } }
"晚上" | "晚间" | "夜里" | "夜晚" => { if hour < 12 { hour + 12 } else { hour } }
"傍晚" | "黄昏" => { if hour < 12 { hour + 12 } else { hour } }
"中午" => { if hour == 12 { 12 } else if hour < 12 { hour + 12 } else { hour } }
"半夜" | "午夜" => { if hour == 12 { 0 } else { hour } }
_ => hour,
}
} else {
hour
}
}
const PERIOD_PATTERN: &str = "(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?";
fn try_every_day(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> { fn try_every_day(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new( if let Some(caps) = RE_EVERY_DAY_EXACT.captures(input) {
&format!(r"(?:每天|每日)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
if let Some(caps) = re.captures(input) {
let period = caps.get(1).map(|m| m.as_str()); let period = caps.get(1).map(|m| m.as_str());
let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?; let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?;
let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0); let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
@@ -229,9 +266,7 @@ fn try_every_day(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<Sch
})); }));
} }
// "每天早上/下午..." without explicit hour if let Some(caps) = RE_EVERY_DAY_PERIOD.captures(input) {
let re2 = regex::Regex::new(r"(?:每天|每日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)").ok()?;
if let Some(caps) = re2.captures(input) {
let period = caps.get(1)?.as_str(); let period = caps.get(1)?.as_str();
if let Some(hour) = period_to_hour(period) { if let Some(hour) = period_to_hour(period) {
return Some(ScheduleParseResult::Exact(ParsedSchedule { return Some(ScheduleParseResult::Exact(ParsedSchedule {
@@ -248,11 +283,7 @@ fn try_every_day(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<Sch
} }
fn try_every_week(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> { fn try_every_week(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new( let caps = RE_EVERY_WEEK.captures(input)?;
&format!(r"(?:每周|每个?星期|每个?礼拜)(一|二|三|四|五|六|日|天|周一|周二|周三|周四|周五|周六|周日|周天|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
let caps = re.captures(input)?;
let day_str = caps.get(1)?.as_str(); let day_str = caps.get(1)?.as_str();
let dow = weekday_to_cron(day_str)?; let dow = weekday_to_cron(day_str)?;
let period = caps.get(2).map(|m| m.as_str()); let period = caps.get(2).map(|m| m.as_str());
@@ -273,11 +304,7 @@ fn try_every_week(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<Sc
} }
fn try_workday(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> { fn try_workday(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new( if let Some(caps) = RE_WORKDAY_EXACT.captures(input) {
&format!(r"(?:工作日|每个?工作日|工作日(?:的)?){}(\d{{1,2}})[点时:](\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
if let Some(caps) = re.captures(input) {
let period = caps.get(1).map(|m| m.as_str()); let period = caps.get(1).map(|m| m.as_str());
let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?; let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?;
let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0); let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
@@ -294,11 +321,7 @@ fn try_workday(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<Sched
})); }));
} }
// "工作日下午3点" style if let Some(caps) = RE_WORKDAY_PERIOD.captures(input) {
let re2 = regex::Regex::new(
r"(?:工作日|每个?工作日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)"
).ok()?;
if let Some(caps) = re2.captures(input) {
let period = caps.get(1)?.as_str(); let period = caps.get(1)?.as_str();
if let Some(hour) = period_to_hour(period) { if let Some(hour) = period_to_hour(period) {
return Some(ScheduleParseResult::Exact(ParsedSchedule { return Some(ScheduleParseResult::Exact(ParsedSchedule {
@@ -315,9 +338,7 @@ fn try_workday(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<Sched
} }
fn try_interval(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> { fn try_interval(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
// "每2小时", "每30分钟", "每N小时/分钟" if let Some(caps) = RE_INTERVAL.captures(input) {
let re = regex::Regex::new(r"每(\d{1,2})(小时|分钟|分|钟|个小时)").ok()?;
if let Some(caps) = re.captures(input) {
let n: u32 = caps.get(1)?.as_str().parse().ok()?; let n: u32 = caps.get(1)?.as_str().parse().ok()?;
if n == 0 { if n == 0 {
return None; return None;
@@ -341,11 +362,7 @@ fn try_interval(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<Sche
} }
fn try_monthly(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> { fn try_monthly(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new( if let Some(caps) = RE_MONTHLY.captures(input) {
&format!(r"(?:每月|每个月)(?:的)?(\d{{1,2}})[号日](?:的)?{}(\d{{1,2}})?[点时:]?(\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
if let Some(caps) = re.captures(input) {
let day: u32 = caps.get(1)?.as_str().parse().ok()?; let day: u32 = caps.get(1)?.as_str().parse().ok()?;
let period = caps.get(2).map(|m| m.as_str()); let period = caps.get(2).map(|m| m.as_str());
let raw_hour: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(9)).unwrap_or(9); let raw_hour: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(9)).unwrap_or(9);
@@ -367,11 +384,7 @@ fn try_monthly(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<Sched
} }
fn try_one_shot(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> { fn try_one_shot(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new( let caps = RE_ONE_SHOT.captures(input)?;
&format!(r"(明天|后天|大后天)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
let caps = re.captures(input)?;
let day_offset = match caps.get(1)?.as_str() { let day_offset = match caps.get(1)?.as_str() {
"明天" => 1, "明天" => 1,
"后天" => 2, "后天" => 2,