feat(hermes): implement intelligence pipeline — 4 chunks, 684 tests passing

Hermes Intelligence Pipeline closes breakpoints in ZCLAW's existing
intelligence components with 4 self-contained modules:

Chunk 1 — Self-improvement Loop:
- ExperienceStore (zclaw-growth): FTS5+TF-IDF wrapper with scope prefix
- ExperienceExtractor (desktop/intelligence): template-based extraction
  from successful proposals with implicit keyword detection

Chunk 2 — User Modeling:
- UserProfileStore (zclaw-memory): SQLite-backed structured profiles
  with industry/role/expertise/comm_style/recent_topics/pain_points
- UserProfiler (desktop/intelligence): fact classification by category
  (Preference/Knowledge/Behavior) with profile summary formatting

Chunk 3 — NL Cron Chinese Time Parser:
- NlScheduleParser (zclaw-runtime): 6 pattern matchers for Chinese time
  expressions (每天/每周/工作日/间隔/每月/一次性) producing cron expressions
- Period-aware hour adjustment (下午3点→15, 晚上8点→20)
- Schedule intent detection + task description extraction

Chunk 4 — Trajectory Compression:
- TrajectoryStore (zclaw-memory): trajectory_events + compressed_trajectories
- TrajectoryRecorderMiddleware (zclaw-runtime/middleware): priority 650,
  async non-blocking event recording via tokio::spawn
- TrajectoryCompressor (desktop/intelligence): dedup, request classification,
  satisfaction detection, execution chain JSON

Schema migrations: v2→v3 (user_profiles), v3→v4 (trajectory tables)
This commit is contained in:
iven
2026-04-09 17:47:43 +08:00
parent 0883bb28ff
commit 4b15ead8e7
15 changed files with 4225 additions and 0 deletions

View File

@@ -0,0 +1,231 @@
//! Trajectory Recorder Middleware — records tool-call chains for analysis.
//!
//! Priority 650 (telemetry range: after business middleware at 400-599,
//! before token_calibration at 700). Records events asynchronously via
//! `tokio::spawn` so the main conversation flow is never blocked.
use async_trait::async_trait;
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use tokio::sync::RwLock;
use zclaw_memory::trajectory_store::{
TrajectoryEvent, TrajectoryStepType, TrajectoryStore,
};
use zclaw_types::{Result, SessionId};
use crate::driver::ContentBlock;
use crate::middleware::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
// ---------------------------------------------------------------------------
// Step counter per session
// ---------------------------------------------------------------------------
/// Tracks step indices per session so events are ordered correctly.
struct StepCounter {
counters: RwLock<Vec<(String, Arc<AtomicU64>)>>,
}
impl StepCounter {
fn new() -> Self {
Self {
counters: RwLock::new(Vec::new()),
}
}
async fn next(&self, session_id: &str) -> usize {
let map = self.counters.read().await;
for (sid, counter) in map.iter() {
if sid == session_id {
return counter.fetch_add(1, Ordering::Relaxed) as usize;
}
}
drop(map);
let mut map = self.counters.write().await;
// Double-check after acquiring write lock
for (sid, counter) in map.iter() {
if sid == session_id {
return counter.fetch_add(1, Ordering::Relaxed) as usize;
}
}
let counter = Arc::new(AtomicU64::new(1));
map.push((session_id.to_string(), counter.clone()));
0
}
}
// ---------------------------------------------------------------------------
// TrajectoryRecorderMiddleware
// ---------------------------------------------------------------------------
/// Middleware that records agent loop events into `TrajectoryStore`.
///
/// Hooks:
/// - `before_completion` → records UserRequest step
/// - `after_tool_call` → records ToolExecution step
/// - `after_completion` → records LlmGeneration step
pub struct TrajectoryRecorderMiddleware {
store: Arc<TrajectoryStore>,
step_counter: StepCounter,
}
impl TrajectoryRecorderMiddleware {
pub fn new(store: Arc<TrajectoryStore>) -> Self {
Self {
store,
step_counter: StepCounter::new(),
}
}
/// Spawn an async write — fire-and-forget, non-blocking.
fn spawn_write(&self, event: TrajectoryEvent) {
let store = self.store.clone();
tokio::spawn(async move {
if let Err(e) = store.insert_event(&event).await {
tracing::warn!(
"[TrajectoryRecorder] Async write failed (non-fatal): {}",
e
);
}
});
}
fn truncate(s: &str, max: usize) -> String {
if s.len() <= max {
s.to_string()
} else {
s.chars().take(max).collect::<String>() + ""
}
}
}
#[async_trait]
impl AgentMiddleware for TrajectoryRecorderMiddleware {
fn name(&self) -> &str {
"trajectory_recorder"
}
fn priority(&self) -> i32 {
650
}
async fn before_completion(
&self,
ctx: &mut MiddlewareContext,
) -> Result<MiddlewareDecision> {
if ctx.user_input.is_empty() {
return Ok(MiddlewareDecision::Continue);
}
let step = self.step_counter.next(&ctx.session_id.to_string()).await;
let event = TrajectoryEvent {
id: uuid::Uuid::new_v4().to_string(),
session_id: ctx.session_id.to_string(),
agent_id: ctx.agent_id.to_string(),
step_index: step,
step_type: TrajectoryStepType::UserRequest,
input_summary: Self::truncate(&ctx.user_input, 200),
output_summary: String::new(),
duration_ms: 0,
timestamp: chrono::Utc::now(),
};
self.spawn_write(event);
Ok(MiddlewareDecision::Continue)
}
async fn after_tool_call(
&self,
ctx: &mut MiddlewareContext,
tool_name: &str,
result: &serde_json::Value,
) -> Result<()> {
let step = self.step_counter.next(&ctx.session_id.to_string()).await;
let result_summary = match result {
serde_json::Value::String(s) => Self::truncate(s, 200),
serde_json::Value::Object(_) => {
let s = serde_json::to_string(result).unwrap_or_default();
Self::truncate(&s, 200)
}
other => Self::truncate(&other.to_string(), 200),
};
let event = TrajectoryEvent {
id: uuid::Uuid::new_v4().to_string(),
session_id: ctx.session_id.to_string(),
agent_id: ctx.agent_id.to_string(),
step_index: step,
step_type: TrajectoryStepType::ToolExecution,
input_summary: Self::truncate(tool_name, 200),
output_summary: result_summary,
duration_ms: 0,
timestamp: chrono::Utc::now(),
};
self.spawn_write(event);
Ok(())
}
async fn after_completion(&self, ctx: &MiddlewareContext) -> Result<()> {
let step = self.step_counter.next(&ctx.session_id.to_string()).await;
let output_summary = ctx.response_content.iter()
.filter_map(|b| match b {
ContentBlock::Text { text } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join(" ");
let event = TrajectoryEvent {
id: uuid::Uuid::new_v4().to_string(),
session_id: ctx.session_id.to_string(),
agent_id: ctx.agent_id.to_string(),
step_index: step,
step_type: TrajectoryStepType::LlmGeneration,
input_summary: String::new(),
output_summary: Self::truncate(&output_summary, 200),
duration_ms: 0,
timestamp: chrono::Utc::now(),
};
self.spawn_write(event);
Ok(())
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_step_counter_sequential() {
let counter = StepCounter::new();
assert_eq!(counter.next("sess-1").await, 0);
assert_eq!(counter.next("sess-1").await, 1);
assert_eq!(counter.next("sess-1").await, 2);
}
#[tokio::test]
async fn test_step_counter_different_sessions() {
let counter = StepCounter::new();
assert_eq!(counter.next("sess-1").await, 0);
assert_eq!(counter.next("sess-2").await, 0);
assert_eq!(counter.next("sess-1").await, 1);
assert_eq!(counter.next("sess-2").await, 1);
}
#[test]
fn test_truncate_short() {
assert_eq!(TrajectoryRecorderMiddleware::truncate("hello", 10), "hello");
}
#[test]
fn test_truncate_long() {
let long: String = "".repeat(300);
let truncated = TrajectoryRecorderMiddleware::truncate(&long, 200);
assert!(truncated.chars().count() <= 201); // 200 + …
}
}