Files
zclaw_openfang/crates/zclaw-runtime/src/nl_schedule.rs
iven 4b15ead8e7 feat(hermes): implement intelligence pipeline — 4 chunks, 684 tests passing
Hermes Intelligence Pipeline closes breakpoints in ZCLAW's existing
intelligence components with 4 self-contained modules:

Chunk 1 — Self-improvement Loop:
- ExperienceStore (zclaw-growth): FTS5+TF-IDF wrapper with scope prefix
- ExperienceExtractor (desktop/intelligence): template-based extraction
  from successful proposals with implicit keyword detection

Chunk 2 — User Modeling:
- UserProfileStore (zclaw-memory): SQLite-backed structured profiles
  with industry/role/expertise/comm_style/recent_topics/pain_points
- UserProfiler (desktop/intelligence): fact classification by category
  (Preference/Knowledge/Behavior) with profile summary formatting

Chunk 3 — NL Cron Chinese Time Parser:
- NlScheduleParser (zclaw-runtime): 6 pattern matchers for Chinese time
  expressions (每天/每周/工作日/间隔/每月/一次性) producing cron expressions
- Period-aware hour adjustment (下午3点→15, 晚上8点→20)
- Schedule intent detection + task description extraction

Chunk 4 — Trajectory Compression:
- TrajectoryStore (zclaw-memory): trajectory_events + compressed_trajectories
- TrajectoryRecorderMiddleware (zclaw-runtime/middleware): priority 650,
  async non-blocking event recording via tokio::spawn
- TrajectoryCompressor (desktop/intelligence): dedup, request classification,
  satisfaction detection, execution chain JSON

Schema migrations: v2→v3 (user_profiles), v3→v4 (trajectory tables)
2026-04-09 17:47:43 +08:00

594 lines
22 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Natural Language Schedule Parser — transforms Chinese time expressions into cron.
//!
//! Three-layer fallback strategy:
//! 1. Regex pattern matching (covers ~80% of common expressions)
//! 2. LLM-assisted parsing (for ambiguous/complex expressions) — TODO: wire when Haiku driver available
//! 3. Interactive clarification (return `Unclear`)
//!
//! Lives in `zclaw-runtime` because it's a pure text→cron utility with no kernel dependency.
use chrono::{Datelike, Timelike};
use serde::{Deserialize, Serialize};
use zclaw_types::AgentId;
// ---------------------------------------------------------------------------
// Data structures
// ---------------------------------------------------------------------------
/// Result of parsing a natural language schedule expression.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParsedSchedule {
/// Cron expression, e.g. "0 9 * * *"
pub cron_expression: String,
/// Human-readable description of the schedule
pub natural_description: String,
/// Confidence of the parse (0.01.0)
pub confidence: f32,
/// What the task does (extracted from user input)
pub task_description: String,
/// What to trigger when the schedule fires
pub task_target: TaskTarget,
}
/// Target to trigger on schedule.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", content = "id")]
pub enum TaskTarget {
/// Trigger a specific agent
Agent(String),
/// Trigger a specific hand
Hand(String),
/// Trigger a specific workflow
Workflow(String),
/// Generic reminder (no specific target)
Reminder,
}
/// Outcome of NL schedule parsing.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ScheduleParseResult {
/// High-confidence single parse
Exact(ParsedSchedule),
/// Multiple possible interpretations
Ambiguous(Vec<ParsedSchedule>),
/// Unable to parse — needs user clarification
Unclear,
}
// ---------------------------------------------------------------------------
// Regex pattern library
// ---------------------------------------------------------------------------
/// A single pattern for matching Chinese time expressions.
struct SchedulePattern {
/// Regex pattern string
regex: &'static str,
/// Cron template — use {h} for hour, {m} for minute, {dow} for day-of-week, {dom} for day-of-month
cron_template: &'static str,
/// Human description template
description: &'static str,
/// Base confidence for this pattern
confidence: f32,
}
/// Chinese time period keywords → hour mapping
fn period_to_hour(period: &str) -> Option<u32> {
match period {
"凌晨" => Some(0),
"早上" | "早晨" | "上午" => Some(9),
"中午" => Some(12),
"下午" | "午后" => Some(15),
"傍晚" | "黄昏" => Some(18),
"晚上" | "晚间" | "夜里" | "夜晚" => Some(21),
"半夜" | "午夜" => Some(0),
_ => None,
}
}
/// Chinese weekday names → cron day-of-week
fn weekday_to_cron(day: &str) -> Option<&'static str> {
match day {
"" | "周一" | "星期一" | "礼拜一" => Some("1"),
"" | "周二" | "星期二" | "礼拜二" => Some("2"),
"" | "周三" | "星期三" | "礼拜三" => Some("3"),
"" | "周四" | "星期四" | "礼拜四" => Some("4"),
"" | "周五" | "星期五" | "礼拜五" => Some("5"),
"" | "周六" | "星期六" | "礼拜六" => Some("6"),
"" | "周日" | "星期日" | "礼拜日" | "" | "周天" | "星期天" | "礼拜天" => Some("0"),
_ => None,
}
}
// ---------------------------------------------------------------------------
// Parser implementation
// ---------------------------------------------------------------------------
/// Parse a natural language schedule expression into a cron expression.
///
/// Uses a series of regex-based pattern matchers covering common Chinese
/// time expressions. Returns `Unclear` if no pattern matches.
pub fn parse_nl_schedule(input: &str, default_agent_id: &AgentId) -> ScheduleParseResult {
let input = input.trim();
if input.is_empty() {
return ScheduleParseResult::Unclear;
}
// Extract task description (everything after keywords like "提醒我", "帮我")
let task_description = extract_task_description(input);
// --- Pattern 1: 每天 + 时间 ---
if let Some(result) = try_every_day(input, &task_description, default_agent_id) {
return result;
}
// --- Pattern 2: 每周N + 时间 ---
if let Some(result) = try_every_week(input, &task_description, default_agent_id) {
return result;
}
// --- Pattern 3: 工作日 + 时间 ---
if let Some(result) = try_workday(input, &task_description, default_agent_id) {
return result;
}
// --- Pattern 4: 每N小时/分钟 ---
if let Some(result) = try_interval(input, &task_description, default_agent_id) {
return result;
}
// --- Pattern 5: 每月N号 ---
if let Some(result) = try_monthly(input, &task_description, default_agent_id) {
return result;
}
// --- Pattern 6: 明天/后天 + 时间 (one-shot) ---
if let Some(result) = try_one_shot(input, &task_description, default_agent_id) {
return result;
}
ScheduleParseResult::Unclear
}
/// Extract task description from input, stripping schedule-related keywords.
fn extract_task_description(input: &str) -> String {
let strip_prefixes = [
"每天", "每日", "每周", "工作日", "每个工作日",
"每月", "", "定时", "定期",
"提醒我", "提醒", "帮我", "", "",
"明天", "后天", "大后天",
];
let mut desc = input.to_string();
// Strip prefixes + time expressions in alternating passes until stable
let time_re = regex::Regex::new(
r"^(?:凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?\d{1,2}[点时:]\d{0,2}分?"
).unwrap_or_else(|_| regex::Regex::new("").unwrap());
for _ in 0..3 {
// Pass 1: strip prefixes
loop {
let mut stripped = false;
for prefix in &strip_prefixes {
if desc.starts_with(prefix) {
desc = desc[prefix.len()..].to_string();
stripped = true;
}
}
if !stripped { break; }
}
// Pass 2: strip time expressions
let new_desc = time_re.replace(&desc, "").to_string();
if new_desc == desc { break; }
desc = new_desc;
}
desc.trim().to_string()
}
// -- Pattern matchers --
/// Adjust hour based on time-of-day period. Chinese 12-hour convention:
/// 下午3点 = 15, 晚上8点 = 20, etc. Morning hours stay as-is.
fn adjust_hour_for_period(hour: u32, period: Option<&str>) -> u32 {
if let Some(p) = period {
match p {
"下午" | "午后" => { if hour < 12 { hour + 12 } else { hour } }
"晚上" | "晚间" | "夜里" | "夜晚" => { if hour < 12 { hour + 12 } else { hour } }
"傍晚" | "黄昏" => { if hour < 12 { hour + 12 } else { hour } }
"中午" => { if hour == 12 { 12 } else if hour < 12 { hour + 12 } else { hour } }
"半夜" | "午夜" => { if hour == 12 { 0 } else { hour } }
_ => hour,
}
} else {
hour
}
}
const PERIOD_PATTERN: &str = "(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)?";
fn try_every_day(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new(
&format!(r"(?:每天|每日)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
if let Some(caps) = re.captures(input) {
let period = caps.get(1).map(|m| m.as_str());
let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?;
let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if hour > 23 || minute > 59 {
return None;
}
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("{} {} * * *", minute, hour),
natural_description: format!("每天{:02}:{:02}", hour, minute),
confidence: 0.95,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
// "每天早上/下午..." without explicit hour
let re2 = regex::Regex::new(r"(?:每天|每日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)").ok()?;
if let Some(caps) = re2.captures(input) {
let period = caps.get(1)?.as_str();
if let Some(hour) = period_to_hour(period) {
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("0 {} * * *", hour),
natural_description: format!("每天{}", period),
confidence: 0.85,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
}
None
}
fn try_every_week(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new(
&format!(r"(?:每周|每个?星期|每个?礼拜)(一|二|三|四|五|六|日|天|周一|周二|周三|周四|周五|周六|周日|周天|星期一|星期二|星期三|星期四|星期五|星期六|星期日|星期天|礼拜一|礼拜二|礼拜三|礼拜四|礼拜五|礼拜六|礼拜日|礼拜天)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
let caps = re.captures(input)?;
let day_str = caps.get(1)?.as_str();
let dow = weekday_to_cron(day_str)?;
let period = caps.get(2).map(|m| m.as_str());
let raw_hour: u32 = caps.get(3)?.as_str().parse().ok()?;
let minute: u32 = caps.get(4).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if hour > 23 || minute > 59 {
return None;
}
Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("{} {} * * {}", minute, hour, dow),
natural_description: format!("每周{} {:02}:{:02}", day_str, hour, minute),
confidence: 0.92,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}))
}
fn try_workday(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new(
&format!(r"(?:工作日|每个?工作日|工作日(?:的)?){}(\d{{1,2}})[点时:](\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
if let Some(caps) = re.captures(input) {
let period = caps.get(1).map(|m| m.as_str());
let raw_hour: u32 = caps.get(2)?.as_str().parse().ok()?;
let minute: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if hour > 23 || minute > 59 {
return None;
}
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("{} {} * * 1-5", minute, hour),
natural_description: format!("工作日{:02}:{:02}", hour, minute),
confidence: 0.90,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
// "工作日下午3点" style
let re2 = regex::Regex::new(
r"(?:工作日|每个?工作日)(?:的)?(凌晨|早上|早晨|上午|中午|下午|午后|傍晚|黄昏|晚上|晚间|夜里|夜晚|半夜|午夜)"
).ok()?;
if let Some(caps) = re2.captures(input) {
let period = caps.get(1)?.as_str();
if let Some(hour) = period_to_hour(period) {
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("0 {} * * 1-5", hour),
natural_description: format!("工作日{}", period),
confidence: 0.85,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
}
None
}
fn try_interval(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
// "每2小时", "每30分钟", "每N小时/分钟"
let re = regex::Regex::new(r"每(\d{1,2})(小时|分钟|分|钟|个小时)").ok()?;
if let Some(caps) = re.captures(input) {
let n: u32 = caps.get(1)?.as_str().parse().ok()?;
if n == 0 {
return None;
}
let unit = caps.get(2)?.as_str();
let (cron, desc) = if unit.contains("") {
(format!("0 */{} * * *", n), format!("{}小时", n))
} else {
(format!("*/{} * * * *", n), format!("{}分钟", n))
};
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: cron,
natural_description: desc,
confidence: 0.90,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
None
}
fn try_monthly(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new(
&format!(r"(?:每月|每个月)(?:的)?(\d{{1,2}})[号日](?:的)?{}(\d{{1,2}})?[点时:]?(\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
if let Some(caps) = re.captures(input) {
let day: u32 = caps.get(1)?.as_str().parse().ok()?;
let period = caps.get(2).map(|m| m.as_str());
let raw_hour: u32 = caps.get(3).map(|m| m.as_str().parse().unwrap_or(9)).unwrap_or(9);
let minute: u32 = caps.get(4).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if day > 31 || hour > 23 || minute > 59 {
return None;
}
return Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: format!("{} {} {} * *", minute, hour, day),
natural_description: format!("每月{}号 {:02}:{:02}", day, hour, minute),
confidence: 0.90,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}));
}
None
}
fn try_one_shot(input: &str, task_desc: &str, agent_id: &AgentId) -> Option<ScheduleParseResult> {
let re = regex::Regex::new(
&format!(r"(明天|后天|大后天)(?:的)?{}(\d{{1,2}})[点时:](\d{{1,2}})?", PERIOD_PATTERN)
).ok()?;
let caps = re.captures(input)?;
let day_offset = match caps.get(1)?.as_str() {
"明天" => 1,
"后天" => 2,
"大后天" => 3,
_ => return None,
};
let period = caps.get(2).map(|m| m.as_str());
let raw_hour: u32 = caps.get(3)?.as_str().parse().ok()?;
let minute: u32 = caps.get(4).map(|m| m.as_str().parse().unwrap_or(0)).unwrap_or(0);
let hour = adjust_hour_for_period(raw_hour, period);
if hour > 23 || minute > 59 {
return None;
}
let target = chrono::Utc::now()
.checked_add_signed(chrono::Duration::days(day_offset))
.unwrap_or_else(chrono::Utc::now)
.with_hour(hour)
.unwrap_or_else(|| chrono::Utc::now())
.with_minute(minute)
.unwrap_or_else(|| chrono::Utc::now())
.with_second(0)
.unwrap_or_else(|| chrono::Utc::now());
Some(ScheduleParseResult::Exact(ParsedSchedule {
cron_expression: target.to_rfc3339(),
natural_description: format!("{} {:02}:{:02}", caps.get(1)?.as_str(), hour, minute),
confidence: 0.88,
task_description: task_desc.to_string(),
task_target: TaskTarget::Agent(agent_id.to_string()),
}))
}
// ---------------------------------------------------------------------------
// Schedule intent detection
// ---------------------------------------------------------------------------
/// Keywords indicating the user wants to set a scheduled task.
const SCHEDULE_INTENT_KEYWORDS: &[&str] = &[
"提醒我", "提醒", "定时", "每天", "每日", "每周", "每月",
"工作日", "每隔", "", "定期", "到时候", "准时",
"闹钟", "闹铃", "日程", "日历",
];
/// Check if user input contains schedule intent.
pub fn has_schedule_intent(input: &str) -> bool {
let lower = input.to_lowercase();
SCHEDULE_INTENT_KEYWORDS.iter().any(|kw| lower.contains(kw))
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
fn default_agent() -> AgentId {
AgentId::new()
}
#[test]
fn test_every_day_explicit_time() {
let result = parse_nl_schedule("每天早上9点提醒我查房", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 9 * * *");
assert!(s.confidence >= 0.9);
}
_ => panic!("Expected Exact, got {:?}", result),
}
}
#[test]
fn test_every_day_with_minute() {
let result = parse_nl_schedule("每天下午3点30分提醒我", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "30 15 * * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_every_day_period_only() {
let result = parse_nl_schedule("每天早上提醒我看看报告", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 9 * * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_every_week_monday() {
let result = parse_nl_schedule("每周一上午10点提醒我开会", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 10 * * 1");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_every_week_friday() {
let result = parse_nl_schedule("每个星期五下午2点", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 14 * * 5");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_workday() {
let result = parse_nl_schedule("工作日下午3点提醒我写周报", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 15 * * 1-5");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_interval_hours() {
let result = parse_nl_schedule("每2小时提醒我喝水", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 */2 * * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_interval_minutes() {
let result = parse_nl_schedule("每30分钟检查一次", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "*/30 * * * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_monthly() {
let result = parse_nl_schedule("每月1号早上9点提醒我", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert_eq!(s.cron_expression, "0 9 1 * *");
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_one_shot_tomorrow() {
let result = parse_nl_schedule("明天下午3点提醒我开会", &default_agent());
match result {
ScheduleParseResult::Exact(s) => {
assert!(s.cron_expression.contains('T'));
assert!(s.natural_description.contains("明天"));
}
_ => panic!("Expected Exact"),
}
}
#[test]
fn test_unclear_input() {
let result = parse_nl_schedule("今天天气怎么样", &default_agent());
assert!(matches!(result, ScheduleParseResult::Unclear));
}
#[test]
fn test_empty_input() {
let result = parse_nl_schedule("", &default_agent());
assert!(matches!(result, ScheduleParseResult::Unclear));
}
#[test]
fn test_schedule_intent_detection() {
assert!(has_schedule_intent("每天早上9点提醒我查房"));
assert!(has_schedule_intent("帮我设个定时任务"));
assert!(has_schedule_intent("工作日提醒我打卡"));
assert!(!has_schedule_intent("今天天气怎么样"));
assert!(!has_schedule_intent("帮我写个报告"));
}
#[test]
fn test_period_to_hour_mapping() {
assert_eq!(period_to_hour("凌晨"), Some(0));
assert_eq!(period_to_hour("早上"), Some(9));
assert_eq!(period_to_hour("中午"), Some(12));
assert_eq!(period_to_hour("下午"), Some(15));
assert_eq!(period_to_hour("晚上"), Some(21));
assert_eq!(period_to_hour("不知道"), None);
}
#[test]
fn test_weekday_to_cron_mapping() {
assert_eq!(weekday_to_cron(""), Some("1"));
assert_eq!(weekday_to_cron(""), Some("5"));
assert_eq!(weekday_to_cron(""), Some("0"));
assert_eq!(weekday_to_cron("星期三"), Some("3"));
assert_eq!(weekday_to_cron("礼拜天"), Some("0"));
assert_eq!(weekday_to_cron("未知"), None);
}
#[test]
fn test_task_description_extraction() {
assert_eq!(extract_task_description("每天早上9点提醒我查房"), "查房");
}
}