diff --git a/crates/zclaw-kernel/src/skill_router.rs b/crates/zclaw-kernel/src/skill_router.rs index a414b2e..dfff2ff 100644 --- a/crates/zclaw-kernel/src/skill_router.rs +++ b/crates/zclaw-kernel/src/skill_router.rs @@ -1,7 +1,11 @@ //! Skill router integration for the Kernel //! -//! Bridges zclaw-growth's `EmbeddingClient` to zclaw-skills' `Embedder` trait, +//! Bridges zclaw-runtime's `EmbeddingClient` to zclaw-skills' `Embedder` trait, //! enabling the `SemanticSkillRouter` to use real embedding APIs. +//! +//! Also provides `LlmSkillFallback` — a concrete `RuntimeLlmIntent` that +//! delegates ambiguous skill routing to an LLM when TF-IDF/embedding confidence +//! is below the configured threshold. use std::sync::Arc; use async_trait::async_trait; @@ -23,3 +27,168 @@ impl zclaw_skills::semantic_router::Embedder for EmbeddingAdapter { self.client.embed(text).await.ok() } } + +// --------------------------------------------------------------------------- +// LLM Skill Fallback +// --------------------------------------------------------------------------- + +/// LLM-based skill fallback for ambiguous routing decisions. +/// +/// When TF-IDF + embedding similarity cannot reach the confidence threshold, +/// this implementation sends the top candidates to an LLM and asks it to +/// pick the best match. +pub struct LlmSkillFallback { + driver: Arc, +} + +impl LlmSkillFallback { + /// Create a new LLM fallback wrapping an existing LLM driver. + pub fn new(driver: Arc) -> Self { + Self { driver } + } +} + +#[async_trait] +impl zclaw_skills::semantic_router::RuntimeLlmIntent for LlmSkillFallback { + async fn resolve_skill( + &self, + query: &str, + candidates: &[zclaw_skills::semantic_router::ScoredCandidate], + ) -> Option { + if candidates.is_empty() { + return None; + } + + let candidate_lines: Vec = candidates + .iter() + .enumerate() + .map(|(i, c)| { + format!( + "{}. [{}] {} — {} (score: {:.0}%)", + i + 1, + c.manifest.id, + c.manifest.name, + c.manifest.description, + c.score * 100.0 + ) + }) + .collect(); + + let system_prompt = concat!( + "你是技能路由助手。用户会提出一个问题,你需要从候选技能中选出最合适的一个。\n", + "只返回 JSON,格式: {\"skill_id\": \"...\", \"reasoning\": \"...\"}\n", + "如果没有合适的技能,返回: {\"skill_id\": null}" + ); + + let user_msg = format!( + "用户查询: {}\n\n候选技能:\n{}", + query, + candidate_lines.join("\n") + ); + + let request = zclaw_runtime::driver::CompletionRequest { + model: self.driver.provider().to_string(), + system: Some(system_prompt.to_string()), + messages: vec![zclaw_types::Message::assistant(user_msg)], + max_tokens: Some(256), + temperature: Some(0.1), + stream: false, + ..Default::default() + }; + + let response = match self.driver.complete(request).await { + Ok(r) => r, + Err(e) => { + tracing::warn!("[LlmSkillFallback] LLM call failed: {}", e); + return None; + } + }; + + let text = response.content.iter() + .filter_map(|block| match block { + zclaw_runtime::driver::ContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join(""); + + parse_llm_routing_response(&text, candidates) + } +} + +/// Parse LLM JSON response for skill routing +fn parse_llm_routing_response( + text: &str, + candidates: &[zclaw_skills::semantic_router::ScoredCandidate], +) -> Option { + let json_str = extract_json(text); + let parsed: serde_json::Value = serde_json::from_str(&json_str).ok()?; + + let skill_id = parsed.get("skill_id")?.as_str()?.to_string(); + + // LLM returned null → no match + if skill_id.is_empty() || skill_id == "null" { + return None; + } + + // Verify the skill_id matches one of the candidates + let matched = candidates.iter().find(|c| c.manifest.id.as_str() == skill_id)?; + + let reasoning = parsed.get("reasoning") + .and_then(|v| v.as_str()) + .unwrap_or("LLM selected match") + .to_string(); + + Some(zclaw_skills::semantic_router::RoutingResult { + skill_id, + confidence: matched.score.max(0.5), // LLM-confirmed matches get at least 0.5 + parameters: serde_json::json!({}), + reasoning, + }) +} + +/// Extract JSON object from LLM response text +fn extract_json(text: &str) -> String { + let trimmed = text.trim(); + + // Try markdown code block + if let Some(start) = trimmed.find("```json") { + if let Some(content_start) = trimmed[start..].find('\n') { + if let Some(end) = trimmed[content_start..].find("```") { + return trimmed[content_start + 1..content_start + end].trim().to_string(); + } + } + } + + // Try bare JSON + if let Some(start) = trimmed.find('{') { + if let Some(end) = trimmed.rfind('}') { + return trimmed[start..end + 1].to_string(); + } + } + + trimmed.to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_json_bare() { + let text = r#"{"skill_id": "test", "reasoning": "match"}"#; + assert_eq!(extract_json(text), text); + } + + #[test] + fn test_extract_json_code_block() { + let text = "```json\n{\"skill_id\": \"test\"}\n```"; + assert_eq!(extract_json(text), "{\"skill_id\": \"test\"}"); + } + + #[test] + fn test_extract_json_with_surrounding_text() { + let text = "Here is the result:\n{\"skill_id\": \"test\"}\nDone."; + assert_eq!(extract_json(text), "{\"skill_id\": \"test\"}"); + } +} diff --git a/crates/zclaw-skills/src/semantic_router.rs b/crates/zclaw-skills/src/semantic_router.rs index 0cc1a55..85ee3f2 100644 --- a/crates/zclaw-skills/src/semantic_router.rs +++ b/crates/zclaw-skills/src/semantic_router.rs @@ -24,6 +24,23 @@ pub trait Embedder: Send + Sync { async fn embed(&self, text: &str) -> Option>; } +/// Runtime LLM intent resolution trait. +/// +/// When TF-IDF + embedding confidence is below the threshold, the router +/// delegates to an LLM to pick the best skill from top candidates. +#[async_trait] +pub trait RuntimeLlmIntent: Send + Sync { + /// Ask the LLM to select the best skill for a query. + /// + /// Returns `None` if the LLM cannot determine a match (e.g. query is + /// genuinely unrelated to all candidates). + async fn resolve_skill( + &self, + query: &str, + candidates: &[ScoredCandidate], + ) -> Option; +} + /// No-op embedder that always returns None (forces TF-IDF fallback). pub struct NoOpEmbedder; @@ -71,6 +88,8 @@ pub struct SemanticSkillRouter { skill_embeddings: HashMap>, /// Confidence threshold for direct selection (skip LLM) confidence_threshold: f32, + /// LLM fallback for ambiguous queries (confidence below threshold) + llm_fallback: Option>, } impl SemanticSkillRouter { @@ -82,6 +101,7 @@ impl SemanticSkillRouter { tfidf_index: SkillTfidfIndex::new(), skill_embeddings: HashMap::new(), confidence_threshold: 0.85, + llm_fallback: None, }; router.rebuild_index_sync(); router @@ -98,6 +118,12 @@ impl SemanticSkillRouter { self } + /// Set LLM fallback for ambiguous queries (confidence below threshold) + pub fn with_llm_fallback(mut self, fallback: Arc) -> Self { + self.llm_fallback = Some(fallback); + self + } + /// Rebuild the TF-IDF index from current registry manifests fn rebuild_index_sync(&mut self) { let manifests = self.registry.manifests_snapshot(); @@ -194,7 +220,7 @@ impl SemanticSkillRouter { /// /// Returns `None` if no skill matches well enough. /// If top candidate exceeds `confidence_threshold`, returns directly. - /// Otherwise returns top candidate with lower confidence (caller can invoke LLM fallback). + /// Otherwise, if an LLM fallback is configured, delegates to it for final selection. pub async fn route(&self, query: &str) -> Option { let candidates = self.retrieve_candidates(query, 3).await; @@ -204,23 +230,43 @@ impl SemanticSkillRouter { let best = &candidates[0]; - // If score is very low, don't route + // If score is very low, don't route even with LLM if best.score < 0.1 { return None; } - let confidence = best.score; - let reasoning = if confidence >= self.confidence_threshold { - format!("High semantic match ({:.0}%)", confidence * 100.0) - } else { - format!("Best match ({:.0}%) — may need LLM refinement", confidence * 100.0) - }; + // High confidence → return directly + if best.score >= self.confidence_threshold { + return Some(RoutingResult { + skill_id: best.manifest.id.to_string(), + confidence: best.score, + parameters: serde_json::json!({}), + reasoning: format!("High semantic match ({:.0}%)", best.score * 100.0), + }); + } + // Medium confidence → try LLM fallback if available + if let Some(ref llm) = self.llm_fallback { + if let Some(result) = llm.resolve_skill(query, &candidates).await { + tracing::debug!( + "[SemanticSkillRouter] LLM fallback selected '{}' (original top: '{}' at {:.0}%)", + result.skill_id, + best.manifest.id, + best.score * 100.0 + ); + return Some(result); + } + } + + // No LLM fallback or LLM couldn't decide → return best TF-IDF/embedding match Some(RoutingResult { skill_id: best.manifest.id.to_string(), - confidence, + confidence: best.score, parameters: serde_json::json!({}), - reasoning, + reasoning: format!( + "Best match ({:.0}%) — below threshold, no LLM refinement", + best.score * 100.0 + ), }) } @@ -367,11 +413,58 @@ impl SkillTfidfIndex { } fn tokenize(&self, text: &str) -> Vec { - text.to_lowercase() - .split(|c: char| !c.is_alphanumeric()) - .filter(|s| !s.is_empty() && s.len() > 1 && !self.stop_words.contains(*s)) - .map(|s| s.to_string()) - .collect() + let lower = text.to_lowercase(); + let segments = lower.split(|c: char| !c.is_alphanumeric()) + .filter(|s| !s.is_empty()) + .collect::>(); + + let mut tokens = Vec::new(); + for segment in &segments { + let chars: Vec = segment.chars().collect(); + + // Check if segment contains CJK characters + let has_cjk = chars.iter().any(|&c| Self::is_cjk(c)); + + if has_cjk && chars.len() >= 2 { + // CJK: generate character bigrams (e.g. "财报解读" → ["财报", "报解", "解读"]) + for window in chars.windows(2) { + let bigram = format!("{}{}", window[0], window[1]); + if !self.stop_words.contains(&bigram) { + tokens.push(bigram); + } + } + // Also add individual CJK chars as unigrams for shorter queries + if chars.len() <= 4 { + for &c in &chars { + if Self::is_cjk(c) { + let s = c.to_string(); + if !self.stop_words.contains(&s) { + tokens.push(s); + } + } + } + } + } else if !has_cjk && segment.len() > 1 { + // Non-CJK: use as-is (existing behavior) + if !self.stop_words.contains(*segment) { + tokens.push(segment.to_string()); + } + } + } + + tokens + } + + /// Check if a character is CJK (Chinese, Japanese, Korean) + fn is_cjk(c: char) -> bool { + matches!(c, + '\u{4E00}'..='\u{9FFF}' | // CJK Unified Ideographs + '\u{3400}'..='\u{4DBF}' | // CJK Extension A + '\u{F900}'..='\u{FAFF}' | // CJK Compatibility Ideographs + '\u{3040}'..='\u{309F}' | // Hiragana + '\u{30A0}'..='\u{30FF}' | // Katakana + '\u{AC00}'..='\u{D7AF}' // Hangul Syllables + ) } fn cosine_sim_maps(v1: &HashMap, v2: &HashMap) -> f32 { @@ -516,4 +609,110 @@ mod tests { let c = vec![0.0, 1.0, 0.0]; assert!((cosine_similarity(&a, &c) - 0.0).abs() < 0.001); } + + /// Mock LLM fallback that always picks the candidate matching target_skill_id + struct MockLlmFallback { + target_skill_id: String, + } + + #[async_trait] + impl RuntimeLlmIntent for MockLlmFallback { + async fn resolve_skill( + &self, + _query: &str, + candidates: &[ScoredCandidate], + ) -> Option { + let candidate = candidates.iter().find(|c| c.manifest.id.as_str() == self.target_skill_id)?; + Some(RoutingResult { + skill_id: candidate.manifest.id.to_string(), + confidence: 0.75, + parameters: serde_json::json!({}), + reasoning: "LLM selected this skill".to_string(), + }) + } + } + + #[tokio::test] + async fn test_llm_fallback_invoked_when_below_threshold() { + let registry = Arc::new(SkillRegistry::new()); + + // Register skills with very similar descriptions to force low confidence + let s1 = make_manifest("skill-a", "数据分析师", "数据分析和可视化报告", vec!["数据"]); + let s2 = make_manifest("skill-b", "数据工程师", "数据管道和 ETL 处理", vec!["数据"]); + + registry.register( + Arc::new(crate::runner::PromptOnlySkill::new(s1.clone(), String::new())), + s1, + ).await; + registry.register( + Arc::new(crate::runner::PromptOnlySkill::new(s2.clone(), String::new())), + s2, + ).await; + + // Router with impossibly high threshold to force LLM fallback + let router = SemanticSkillRouter::new_tf_idf_only(registry) + .with_confidence_threshold(2.0) // No TF-IDF score can reach this + .with_llm_fallback(Arc::new(MockLlmFallback { + target_skill_id: "skill-b".to_string(), + })); + + let result = router.route("数据处理").await; + assert!(result.is_some()); + let r = result.unwrap(); + // LLM fallback picks skill-b regardless of TF-IDF ranking + assert_eq!(r.skill_id, "skill-b"); + assert_eq!(r.reasoning, "LLM selected this skill"); + } + + #[tokio::test] + async fn test_no_llm_fallback_when_high_confidence() { + let registry = Arc::new(SkillRegistry::new()); + + let finance = make_manifest( + "finance-tracker", + "财务追踪专家", + "财务追踪专家 专注于企业财务数据分析、财报解读、盈利能力评估", + vec!["财报", "财务分析"], + ); + + registry.register( + Arc::new(crate::runner::PromptOnlySkill::new(finance.clone(), String::new())), + finance, + ).await; + + // Router with LLM fallback that would pick wrong answer — but high TF-IDF should skip LLM + let router = SemanticSkillRouter::new_tf_idf_only(registry) + .with_confidence_threshold(0.3) // Low threshold → TF-IDF should exceed it + .with_llm_fallback(Arc::new(MockLlmFallback { + target_skill_id: "nonexistent".to_string(), + })); + + let result = router.route("分析腾讯财报数据").await; + assert!(result.is_some()); + let r = result.unwrap(); + assert_eq!(r.skill_id, "finance-tracker"); + // Should NOT be LLM reasoning + assert!(r.reasoning.contains("High semantic match")); + } + + #[tokio::test] + async fn test_no_llm_fallback_returns_best_match() { + let registry = Arc::new(SkillRegistry::new()); + + let s1 = make_manifest("skill-x", "数据分析师", "数据分析和可视化报告", vec!["数据"]); + + registry.register( + Arc::new(crate::runner::PromptOnlySkill::new(s1.clone(), String::new())), + s1, + ).await; + + // No LLM fallback configured + let router = SemanticSkillRouter::new_tf_idf_only(registry) + .with_confidence_threshold(0.99); + + let result = router.route("数据分析").await; + assert!(result.is_some()); + // Should still return best TF-IDF match even below threshold + assert_eq!(result.unwrap().skill_id, "skill-x"); + } }