feat(skills): add LLM fallback routing + CJK TF-IDF bigram fix

- SemanticSkillRouter: add RuntimeLlmIntent trait and with_llm_fallback() builder - route(): call LLM fallback when TF-IDF/embedding confidence < threshold - CJK tokenization: generate bigrams for Chinese/Japanese/Korean text - Fix: previous tokenizer treated entire CJK string as one huge token - SemanticSkillRouter: add RuntimeLlmIntent trait and with_llm_fallback() builder - route(): call LLM fallback when TF-IDF/embedding confidence < threshold - CJK tokenization: generate bigrams for Chinese/Japanese/Korean text - Fix: previous tokenizer treated entire CJK string as one huge token - LlmSkillFallback: concrete RuntimeLlmIntent using LlmDriver - Asks LLM to pick best skill from ambiguous candidates list - Parses structured JSON response from LLM output - Includes tests for LLM fallback and CJK tokenization Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-04 07:44:42 +08:00
parent 769bfdf5d6
commit 1399054547
2 changed files with 384 additions and 16 deletions
--- a/crates/zclaw-kernel/src/skill_router.rs
+++ b/crates/zclaw-kernel/src/skill_router.rs
@@ -1,7 +1,11 @@
 //! Skill router integration for the Kernel
 //!
-//! Bridges zclaw-growth's `EmbeddingClient` to zclaw-skills' `Embedder` trait,
+//! Bridges zclaw-runtime's `EmbeddingClient` to zclaw-skills' `Embedder` trait,
 //! enabling the `SemanticSkillRouter` to use real embedding APIs.
+//!
+//! Also provides `LlmSkillFallback` — a concrete `RuntimeLlmIntent` that
+//! delegates ambiguous skill routing to an LLM when TF-IDF/embedding confidence
+//! is below the configured threshold.

 use std::sync::Arc;
 use async_trait::async_trait;
@@ -23,3 +27,168 @@ impl zclaw_skills::semantic_router::Embedder for EmbeddingAdapter {
        self.client.embed(text).await.ok()
    }
 }
+
+// ---------------------------------------------------------------------------
+// LLM Skill Fallback
+// ---------------------------------------------------------------------------
+
+/// LLM-based skill fallback for ambiguous routing decisions.
+///
+/// When TF-IDF + embedding similarity cannot reach the confidence threshold,
+/// this implementation sends the top candidates to an LLM and asks it to
+/// pick the best match.
+pub struct LlmSkillFallback {
+    driver: Arc<dyn zclaw_runtime::driver::LlmDriver>,
+}
+
+impl LlmSkillFallback {
+    /// Create a new LLM fallback wrapping an existing LLM driver.
+    pub fn new(driver: Arc<dyn zclaw_runtime::driver::LlmDriver>) -> Self {
+        Self { driver }
+    }
+}
+
+#[async_trait]
+impl zclaw_skills::semantic_router::RuntimeLlmIntent for LlmSkillFallback {
+    async fn resolve_skill(
+        &self,
+        query: &str,
+        candidates: &[zclaw_skills::semantic_router::ScoredCandidate],
+    ) -> Option<zclaw_skills::semantic_router::RoutingResult> {
+        if candidates.is_empty() {
+            return None;
+        }
+
+        let candidate_lines: Vec<String> = candidates
+            .iter()
+            .enumerate()
+            .map(|(i, c)| {
+                format!(
+                    "{}. [{}] {} — {} (score: {:.0}%)",
+                    i + 1,
+                    c.manifest.id,
+                    c.manifest.name,
+                    c.manifest.description,
+                    c.score * 100.0
+                )
+            })
+            .collect();
+
+        let system_prompt = concat!(
+            "你是技能路由助手。用户会提出一个问题，你需要从候选技能中选出最合适的一个。\n",
+            "只返回 JSON，格式: {\"skill_id\": \"...\", \"reasoning\": \"...\"}\n",
+            "如果没有合适的技能，返回: {\"skill_id\": null}"
+        );
+
+        let user_msg = format!(
+            "用户查询: {}\n\n候选技能:\n{}",
+            query,
+            candidate_lines.join("\n")
+        );
+
+        let request = zclaw_runtime::driver::CompletionRequest {
+            model: self.driver.provider().to_string(),
+            system: Some(system_prompt.to_string()),
+            messages: vec![zclaw_types::Message::assistant(user_msg)],
+            max_tokens: Some(256),
+            temperature: Some(0.1),
+            stream: false,
+            ..Default::default()
+        };
+
+        let response = match self.driver.complete(request).await {
+            Ok(r) => r,
+            Err(e) => {
+                tracing::warn!("[LlmSkillFallback] LLM call failed: {}", e);
+                return None;
+            }
+        };
+
+        let text = response.content.iter()
+            .filter_map(|block| match block {
+                zclaw_runtime::driver::ContentBlock::Text { text } => Some(text.as_str()),
+                _ => None,
+            })
+            .collect::<Vec<_>>()
+            .join("");
+
+        parse_llm_routing_response(&text, candidates)
+    }
+}
+
+/// Parse LLM JSON response for skill routing
+fn parse_llm_routing_response(
+    text: &str,
+    candidates: &[zclaw_skills::semantic_router::ScoredCandidate],
+) -> Option<zclaw_skills::semantic_router::RoutingResult> {
+    let json_str = extract_json(text);
+    let parsed: serde_json::Value = serde_json::from_str(&json_str).ok()?;
+
+    let skill_id = parsed.get("skill_id")?.as_str()?.to_string();
+
+    // LLM returned null → no match
+    if skill_id.is_empty() || skill_id == "null" {
+        return None;
+    }
+
+    // Verify the skill_id matches one of the candidates
+    let matched = candidates.iter().find(|c| c.manifest.id.as_str() == skill_id)?;
+
+    let reasoning = parsed.get("reasoning")
+        .and_then(|v| v.as_str())
+        .unwrap_or("LLM selected match")
+        .to_string();
+
+    Some(zclaw_skills::semantic_router::RoutingResult {
+        skill_id,
+        confidence: matched.score.max(0.5), // LLM-confirmed matches get at least 0.5
+        parameters: serde_json::json!({}),
+        reasoning,
+    })
+}
+
+/// Extract JSON object from LLM response text
+fn extract_json(text: &str) -> String {
+    let trimmed = text.trim();
+
+    // Try markdown code block
+    if let Some(start) = trimmed.find("```json") {
+        if let Some(content_start) = trimmed[start..].find('\n') {
+            if let Some(end) = trimmed[content_start..].find("```") {
+                return trimmed[content_start + 1..content_start + end].trim().to_string();
+            }
+        }
+    }
+
+    // Try bare JSON
+    if let Some(start) = trimmed.find('{') {
+        if let Some(end) = trimmed.rfind('}') {
+            return trimmed[start..end + 1].to_string();
+        }
+    }
+
+    trimmed.to_string()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_extract_json_bare() {
+        let text = r#"{"skill_id": "test", "reasoning": "match"}"#;
+        assert_eq!(extract_json(text), text);
+    }
+
+    #[test]
+    fn test_extract_json_code_block() {
+        let text = "```json\n{\"skill_id\": \"test\"}\n```";
+        assert_eq!(extract_json(text), "{\"skill_id\": \"test\"}");
+    }
+
+    #[test]
+    fn test_extract_json_with_surrounding_text() {
+        let text = "Here is the result:\n{\"skill_id\": \"test\"}\nDone.";
+        assert_eq!(extract_json(text), "{\"skill_id\": \"test\"}");
+    }
+}