feat(skills): add LLM fallback routing + CJK TF-IDF bigram fix

- SemanticSkillRouter: add RuntimeLlmIntent trait and with_llm_fallback() builder - route(): call LLM fallback when TF-IDF/embedding confidence < threshold - CJK tokenization: generate bigrams for Chinese/Japanese/Korean text - Fix: previous tokenizer treated entire CJK string as one huge token - SemanticSkillRouter: add RuntimeLlmIntent trait and with_llm_fallback() builder - route(): call LLM fallback when TF-IDF/embedding confidence < threshold - CJK tokenization: generate bigrams for Chinese/Japanese/Korean text - Fix: previous tokenizer treated entire CJK string as one huge token - LlmSkillFallback: concrete RuntimeLlmIntent using LlmDriver - Asks LLM to pick best skill from ambiguous candidates list - Parses structured JSON response from LLM output - Includes tests for LLM fallback and CJK tokenization Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-04 07:44:42 +08:00
parent 769bfdf5d6
commit 1399054547
2 changed files with 384 additions and 16 deletions
--- a/crates/zclaw-kernel/src/skill_router.rs
+++ b/crates/zclaw-kernel/src/skill_router.rs
@@ -1,7 +1,11 @@
 //! Skill router integration for the Kernel
 //!
-//! Bridges zclaw-growth's `EmbeddingClient` to zclaw-skills' `Embedder` trait,
+//! Bridges zclaw-runtime's `EmbeddingClient` to zclaw-skills' `Embedder` trait,
 //! enabling the `SemanticSkillRouter` to use real embedding APIs.
 //!
 //! Also provides `LlmSkillFallback` — a concrete `RuntimeLlmIntent` that
 //! delegates ambiguous skill routing to an LLM when TF-IDF/embedding confidence
 //! is below the configured threshold.
 use std::sync::Arc;
 use async_trait::async_trait;
@@ -23,3 +27,168 @@ impl zclaw_skills::semantic_router::Embedder for EmbeddingAdapter {
        self.client.embed(text).await.ok()
    }
 }
 // ---------------------------------------------------------------------------
 // LLM Skill Fallback
 // ---------------------------------------------------------------------------
 /// LLM-based skill fallback for ambiguous routing decisions.
 ///
 /// When TF-IDF + embedding similarity cannot reach the confidence threshold,
 /// this implementation sends the top candidates to an LLM and asks it to
 /// pick the best match.
 pub struct LlmSkillFallback {
    driver: Arc<dyn zclaw_runtime::driver::LlmDriver>,
 }
 impl LlmSkillFallback {
    /// Create a new LLM fallback wrapping an existing LLM driver.
    pub fn new(driver: Arc<dyn zclaw_runtime::driver::LlmDriver>) -> Self {
        Self { driver }
    }
 }
 #[async_trait]
 impl zclaw_skills::semantic_router::RuntimeLlmIntent for LlmSkillFallback {
    async fn resolve_skill(
        &self,
        query: &str,
        candidates: &[zclaw_skills::semantic_router::ScoredCandidate],
    ) -> Option<zclaw_skills::semantic_router::RoutingResult> {
        if candidates.is_empty() {
            return None;
        }
        let candidate_lines: Vec<String> = candidates
            .iter()
            .enumerate()
            .map(|(i, c)| {
                format!(
                    "{}. [{}] {} — {} (score: {:.0}%)",
                    i + 1,
                    c.manifest.id,
                    c.manifest.name,
                    c.manifest.description,
                    c.score * 100.0
                )
            })
            .collect();
        let system_prompt = concat!(
            "你是技能路由助手。用户会提出一个问题，你需要从候选技能中选出最合适的一个。\n",
            "只返回 JSON，格式: {\"skill_id\": \"...\", \"reasoning\": \"...\"}\n",
            "如果没有合适的技能，返回: {\"skill_id\": null}"
        );
        let user_msg = format!(
            "用户查询: {}\n\n候选技能:\n{}",
            query,
            candidate_lines.join("\n")
        );
        let request = zclaw_runtime::driver::CompletionRequest {
            model: self.driver.provider().to_string(),
            system: Some(system_prompt.to_string()),
            messages: vec![zclaw_types::Message::assistant(user_msg)],
            max_tokens: Some(256),
            temperature: Some(0.1),
            stream: false,
            ..Default::default()
        };
        let response = match self.driver.complete(request).await {
            Ok(r) => r,
            Err(e) => {
                tracing::warn!("[LlmSkillFallback] LLM call failed: {}", e);
                return None;
            }
        };
        let text = response.content.iter()
            .filter_map(|block| match block {
                zclaw_runtime::driver::ContentBlock::Text { text } => Some(text.as_str()),
                _ => None,
            })
            .collect::<Vec<_>>()
            .join("");
        parse_llm_routing_response(&text, candidates)
    }
 }
 /// Parse LLM JSON response for skill routing
 fn parse_llm_routing_response(
    text: &str,
    candidates: &[zclaw_skills::semantic_router::ScoredCandidate],
 ) -> Option<zclaw_skills::semantic_router::RoutingResult> {
    let json_str = extract_json(text);
    let parsed: serde_json::Value = serde_json::from_str(&json_str).ok()?;
    let skill_id = parsed.get("skill_id")?.as_str()?.to_string();
    // LLM returned null → no match
    if skill_id.is_empty() || skill_id == "null" {
        return None;
    }
    // Verify the skill_id matches one of the candidates
    let matched = candidates.iter().find(|c| c.manifest.id.as_str() == skill_id)?;
    let reasoning = parsed.get("reasoning")
        .and_then(|v| v.as_str())
        .unwrap_or("LLM selected match")
        .to_string();
    Some(zclaw_skills::semantic_router::RoutingResult {
        skill_id,
        confidence: matched.score.max(0.5), // LLM-confirmed matches get at least 0.5
        parameters: serde_json::json!({}),
        reasoning,
    })
 }
 /// Extract JSON object from LLM response text
 fn extract_json(text: &str) -> String {
    let trimmed = text.trim();
    // Try markdown code block
    if let Some(start) = trimmed.find("```json") {
        if let Some(content_start) = trimmed[start..].find('\n') {
            if let Some(end) = trimmed[content_start..].find("```") {
                return trimmed[content_start + 1..content_start + end].trim().to_string();
            }
        }
    }
    // Try bare JSON
    if let Some(start) = trimmed.find('{') {
        if let Some(end) = trimmed.rfind('}') {
            return trimmed[start..end + 1].to_string();
        }
    }
    trimmed.to_string()
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_extract_json_bare() {
        let text = r#"{"skill_id": "test", "reasoning": "match"}"#;
        assert_eq!(extract_json(text), text);
    }
    #[test]
    fn test_extract_json_code_block() {
        let text = "```json\n{\"skill_id\": \"test\"}\n```";
        assert_eq!(extract_json(text), "{\"skill_id\": \"test\"}");
    }
    #[test]
    fn test_extract_json_with_surrounding_text() {
        let text = "Here is the result:\n{\"skill_id\": \"test\"}\nDone.";
        assert_eq!(extract_json(text), "{\"skill_id\": \"test\"}");
    }
 }
--- a/crates/zclaw-skills/src/semantic_router.rs
+++ b/crates/zclaw-skills/src/semantic_router.rs
@@ -24,6 +24,23 @@ pub trait Embedder: Send + Sync {
    async fn embed(&self, text: &str) -> Option<Vec<f32>>;
 }
 /// Runtime LLM intent resolution trait.
 ///
 /// When TF-IDF + embedding confidence is below the threshold, the router
 /// delegates to an LLM to pick the best skill from top candidates.
 #[async_trait]
 pub trait RuntimeLlmIntent: Send + Sync {
    /// Ask the LLM to select the best skill for a query.
    ///
    /// Returns `None` if the LLM cannot determine a match (e.g. query is
    /// genuinely unrelated to all candidates).
    async fn resolve_skill(
        &self,
        query: &str,
        candidates: &[ScoredCandidate],
    ) -> Option<RoutingResult>;
 }
 /// No-op embedder that always returns None (forces TF-IDF fallback).
 pub struct NoOpEmbedder;
@@ -71,6 +88,8 @@ pub struct SemanticSkillRouter {
    skill_embeddings: HashMap<String, Vec<f32>>,
    /// Confidence threshold for direct selection (skip LLM)
    confidence_threshold: f32,
    /// LLM fallback for ambiguous queries (confidence below threshold)
    llm_fallback: Option<Arc<dyn RuntimeLlmIntent>>,
 }
 impl SemanticSkillRouter {
@@ -82,6 +101,7 @@ impl SemanticSkillRouter {
            tfidf_index: SkillTfidfIndex::new(),
            skill_embeddings: HashMap::new(),
            confidence_threshold: 0.85,
            llm_fallback: None,
        };
        router.rebuild_index_sync();
        router
@@ -98,6 +118,12 @@ impl SemanticSkillRouter {
        self
    }
    /// Set LLM fallback for ambiguous queries (confidence below threshold)
    pub fn with_llm_fallback(mut self, fallback: Arc<dyn RuntimeLlmIntent>) -> Self {
        self.llm_fallback = Some(fallback);
        self
    }
    /// Rebuild the TF-IDF index from current registry manifests
    fn rebuild_index_sync(&mut self) {
        let manifests = self.registry.manifests_snapshot();
@@ -194,7 +220,7 @@ impl SemanticSkillRouter {
    ///
    /// Returns `None` if no skill matches well enough.
    /// If top candidate exceeds `confidence_threshold`, returns directly.
-    /// Otherwise returns top candidate with lower confidence (caller can invoke LLM fallback).
+    /// Otherwise, if an LLM fallback is configured, delegates to it for final selection.
    pub async fn route(&self, query: &str) -> Option<RoutingResult> {
        let candidates = self.retrieve_candidates(query, 3).await;
@@ -204,23 +230,43 @@ impl SemanticSkillRouter {
        let best = &candidates[0];
-        // If score is very low, don't route
+        // If score is very low, don't route even with LLM
        if best.score < 0.1 {
            return None;
        }
-        let confidence = best.score;
+        // High confidence → return directly
-        let reasoning = if confidence >= self.confidence_threshold {
+        if best.score >= self.confidence_threshold {
-            format!("High semantic match ({:.0}%)", confidence * 100.0)
+            return Some(RoutingResult {
-        } else {
+                skill_id: best.manifest.id.to_string(),
-            format!("Best match ({:.0}%) — may need LLM refinement", confidence * 100.0)
+                confidence: best.score,
-        };
+                parameters: serde_json::json!({}),
                reasoning: format!("High semantic match ({:.0}%)", best.score * 100.0),
            });
        }
        // Medium confidence → try LLM fallback if available
        if let Some(ref llm) = self.llm_fallback {
            if let Some(result) = llm.resolve_skill(query, &candidates).await {
                tracing::debug!(
                    "[SemanticSkillRouter] LLM fallback selected '{}' (original top: '{}' at {:.0}%)",
                    result.skill_id,
                    best.manifest.id,
                    best.score * 100.0
                );
                return Some(result);
            }
        }
        // No LLM fallback or LLM couldn't decide → return best TF-IDF/embedding match
        Some(RoutingResult {
            skill_id: best.manifest.id.to_string(),
-            confidence,
+            confidence: best.score,
            parameters: serde_json::json!({}),
-            reasoning,
+            reasoning: format!(
                "Best match ({:.0}%) — below threshold, no LLM refinement",
                best.score * 100.0
            ),
        })
    }
@@ -367,11 +413,58 @@ impl SkillTfidfIndex {
    }
    fn tokenize(&self, text: &str) -> Vec<String> {
-        text.to_lowercase()
+        let lower = text.to_lowercase();
-            .split(|c: char| !c.is_alphanumeric())
+        let segments = lower.split(|c: char| !c.is_alphanumeric())
-            .filter(|s| !s.is_empty() && s.len() > 1 && !self.stop_words.contains(*s))
+            .filter(|s| !s.is_empty())
-            .map(|s| s.to_string())
+            .collect::<Vec<_>>();
-            .collect()
+
        let mut tokens = Vec::new();
        for segment in &segments {
            let chars: Vec<char> = segment.chars().collect();
            // Check if segment contains CJK characters
            let has_cjk = chars.iter().any(|&c| Self::is_cjk(c));
            if has_cjk && chars.len() >= 2 {
                // CJK: generate character bigrams (e.g. "财报解读" → ["财报", "报解", "解读"])
                for window in chars.windows(2) {
                    let bigram = format!("{}{}", window[0], window[1]);
                    if !self.stop_words.contains(&bigram) {
                        tokens.push(bigram);
                    }
                }
                // Also add individual CJK chars as unigrams for shorter queries
                if chars.len() <= 4 {
                    for &c in &chars {
                        if Self::is_cjk(c) {
                            let s = c.to_string();
                            if !self.stop_words.contains(&s) {
                                tokens.push(s);
                            }
                        }
                    }
                }
            } else if !has_cjk && segment.len() > 1 {
                // Non-CJK: use as-is (existing behavior)
                if !self.stop_words.contains(*segment) {
                    tokens.push(segment.to_string());
                }
            }
        }
        tokens
    }
    /// Check if a character is CJK (Chinese, Japanese, Korean)
    fn is_cjk(c: char) -> bool {
        matches!(c,
            '\u{4E00}'..='\u{9FFF}'   |  // CJK Unified Ideographs
            '\u{3400}'..='\u{4DBF}'   |  // CJK Extension A
            '\u{F900}'..='\u{FAFF}'   |  // CJK Compatibility Ideographs
            '\u{3040}'..='\u{309F}'   |  // Hiragana
            '\u{30A0}'..='\u{30FF}'   |  // Katakana
            '\u{AC00}'..='\u{D7AF}'      // Hangul Syllables
        )
    }
    fn cosine_sim_maps(v1: &HashMap<String, f32>, v2: &HashMap<String, f32>) -> f32 {
@@ -516,4 +609,110 @@ mod tests {
        let c = vec![0.0, 1.0, 0.0];
        assert!((cosine_similarity(&a, &c) - 0.0).abs() < 0.001);
    }
    /// Mock LLM fallback that always picks the candidate matching target_skill_id
    struct MockLlmFallback {
        target_skill_id: String,
    }
    #[async_trait]
    impl RuntimeLlmIntent for MockLlmFallback {
        async fn resolve_skill(
            &self,
            _query: &str,
            candidates: &[ScoredCandidate],
        ) -> Option<RoutingResult> {
            let candidate = candidates.iter().find(|c| c.manifest.id.as_str() == self.target_skill_id)?;
            Some(RoutingResult {
                skill_id: candidate.manifest.id.to_string(),
                confidence: 0.75,
                parameters: serde_json::json!({}),
                reasoning: "LLM selected this skill".to_string(),
            })
        }
    }
    #[tokio::test]
    async fn test_llm_fallback_invoked_when_below_threshold() {
        let registry = Arc::new(SkillRegistry::new());
        // Register skills with very similar descriptions to force low confidence
        let s1 = make_manifest("skill-a", "数据分析师", "数据分析和可视化报告", vec!["数据"]);
        let s2 = make_manifest("skill-b", "数据工程师", "数据管道和 ETL 处理", vec!["数据"]);
        registry.register(
            Arc::new(crate::runner::PromptOnlySkill::new(s1.clone(), String::new())),
            s1,
        ).await;
        registry.register(
            Arc::new(crate::runner::PromptOnlySkill::new(s2.clone(), String::new())),
            s2,
        ).await;
        // Router with impossibly high threshold to force LLM fallback
        let router = SemanticSkillRouter::new_tf_idf_only(registry)
            .with_confidence_threshold(2.0) // No TF-IDF score can reach this
            .with_llm_fallback(Arc::new(MockLlmFallback {
                target_skill_id: "skill-b".to_string(),
            }));
        let result = router.route("数据处理").await;
        assert!(result.is_some());
        let r = result.unwrap();
        // LLM fallback picks skill-b regardless of TF-IDF ranking
        assert_eq!(r.skill_id, "skill-b");
        assert_eq!(r.reasoning, "LLM selected this skill");
    }
    #[tokio::test]
    async fn test_no_llm_fallback_when_high_confidence() {
        let registry = Arc::new(SkillRegistry::new());
        let finance = make_manifest(
            "finance-tracker",
            "财务追踪专家",
            "财务追踪专家 专注于企业财务数据分析、财报解读、盈利能力评估",
            vec!["财报", "财务分析"],
        );
        registry.register(
            Arc::new(crate::runner::PromptOnlySkill::new(finance.clone(), String::new())),
            finance,
        ).await;
        // Router with LLM fallback that would pick wrong answer — but high TF-IDF should skip LLM
        let router = SemanticSkillRouter::new_tf_idf_only(registry)
            .with_confidence_threshold(0.3) // Low threshold → TF-IDF should exceed it
            .with_llm_fallback(Arc::new(MockLlmFallback {
                target_skill_id: "nonexistent".to_string(),
            }));
        let result = router.route("分析腾讯财报数据").await;
        assert!(result.is_some());
        let r = result.unwrap();
        assert_eq!(r.skill_id, "finance-tracker");
        // Should NOT be LLM reasoning
        assert!(r.reasoning.contains("High semantic match"));
    }
    #[tokio::test]
    async fn test_no_llm_fallback_returns_best_match() {
        let registry = Arc::new(SkillRegistry::new());
        let s1 = make_manifest("skill-x", "数据分析师", "数据分析和可视化报告", vec!["数据"]);
        registry.register(
            Arc::new(crate::runner::PromptOnlySkill::new(s1.clone(), String::new())),
            s1,
        ).await;
        // No LLM fallback configured
        let router = SemanticSkillRouter::new_tf_idf_only(registry)
            .with_confidence_threshold(0.99);
        let result = router.route("数据分析").await;
        assert!(result.is_some());
        // Should still return best TF-IDF match even below threshold
        assert_eq!(result.unwrap().skill_id, "skill-x");
    }
 }