feat(skills): add LLM fallback routing + CJK TF-IDF bigram fix
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

- SemanticSkillRouter: add RuntimeLlmIntent trait and with_llm_fallback() builder
- route(): call LLM fallback when TF-IDF/embedding confidence < threshold
- CJK tokenization: generate bigrams for Chinese/Japanese/Korean text
- Fix: previous tokenizer treated entire CJK string as one huge token

- SemanticSkillRouter: add RuntimeLlmIntent trait and with_llm_fallback() builder
- route(): call LLM fallback when TF-IDF/embedding confidence < threshold
- CJK tokenization: generate bigrams for Chinese/Japanese/Korean text
- Fix: previous tokenizer treated entire CJK string as one huge token

- LlmSkillFallback: concrete RuntimeLlmIntent using LlmDriver
- Asks LLM to pick best skill from ambiguous candidates list
- Parses structured JSON response from LLM output
- Includes tests for LLM fallback and CJK tokenization

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
iven
2026-04-04 07:44:42 +08:00
parent 769bfdf5d6
commit 1399054547
2 changed files with 384 additions and 16 deletions

View File

@@ -24,6 +24,23 @@ pub trait Embedder: Send + Sync {
async fn embed(&self, text: &str) -> Option<Vec<f32>>;
}
/// Runtime LLM intent resolution trait.
///
/// When TF-IDF + embedding confidence is below the threshold, the router
/// delegates to an LLM to pick the best skill from top candidates.
#[async_trait]
pub trait RuntimeLlmIntent: Send + Sync {
/// Ask the LLM to select the best skill for a query.
///
/// Returns `None` if the LLM cannot determine a match (e.g. query is
/// genuinely unrelated to all candidates).
async fn resolve_skill(
&self,
query: &str,
candidates: &[ScoredCandidate],
) -> Option<RoutingResult>;
}
/// No-op embedder that always returns None (forces TF-IDF fallback).
pub struct NoOpEmbedder;
@@ -71,6 +88,8 @@ pub struct SemanticSkillRouter {
skill_embeddings: HashMap<String, Vec<f32>>,
/// Confidence threshold for direct selection (skip LLM)
confidence_threshold: f32,
/// LLM fallback for ambiguous queries (confidence below threshold)
llm_fallback: Option<Arc<dyn RuntimeLlmIntent>>,
}
impl SemanticSkillRouter {
@@ -82,6 +101,7 @@ impl SemanticSkillRouter {
tfidf_index: SkillTfidfIndex::new(),
skill_embeddings: HashMap::new(),
confidence_threshold: 0.85,
llm_fallback: None,
};
router.rebuild_index_sync();
router
@@ -98,6 +118,12 @@ impl SemanticSkillRouter {
self
}
/// Set LLM fallback for ambiguous queries (confidence below threshold)
pub fn with_llm_fallback(mut self, fallback: Arc<dyn RuntimeLlmIntent>) -> Self {
self.llm_fallback = Some(fallback);
self
}
/// Rebuild the TF-IDF index from current registry manifests
fn rebuild_index_sync(&mut self) {
let manifests = self.registry.manifests_snapshot();
@@ -194,7 +220,7 @@ impl SemanticSkillRouter {
///
/// Returns `None` if no skill matches well enough.
/// If top candidate exceeds `confidence_threshold`, returns directly.
/// Otherwise returns top candidate with lower confidence (caller can invoke LLM fallback).
/// Otherwise, if an LLM fallback is configured, delegates to it for final selection.
pub async fn route(&self, query: &str) -> Option<RoutingResult> {
let candidates = self.retrieve_candidates(query, 3).await;
@@ -204,23 +230,43 @@ impl SemanticSkillRouter {
let best = &candidates[0];
// If score is very low, don't route
// If score is very low, don't route even with LLM
if best.score < 0.1 {
return None;
}
let confidence = best.score;
let reasoning = if confidence >= self.confidence_threshold {
format!("High semantic match ({:.0}%)", confidence * 100.0)
} else {
format!("Best match ({:.0}%) — may need LLM refinement", confidence * 100.0)
};
// High confidence → return directly
if best.score >= self.confidence_threshold {
return Some(RoutingResult {
skill_id: best.manifest.id.to_string(),
confidence: best.score,
parameters: serde_json::json!({}),
reasoning: format!("High semantic match ({:.0}%)", best.score * 100.0),
});
}
// Medium confidence → try LLM fallback if available
if let Some(ref llm) = self.llm_fallback {
if let Some(result) = llm.resolve_skill(query, &candidates).await {
tracing::debug!(
"[SemanticSkillRouter] LLM fallback selected '{}' (original top: '{}' at {:.0}%)",
result.skill_id,
best.manifest.id,
best.score * 100.0
);
return Some(result);
}
}
// No LLM fallback or LLM couldn't decide → return best TF-IDF/embedding match
Some(RoutingResult {
skill_id: best.manifest.id.to_string(),
confidence,
confidence: best.score,
parameters: serde_json::json!({}),
reasoning,
reasoning: format!(
"Best match ({:.0}%) — below threshold, no LLM refinement",
best.score * 100.0
),
})
}
@@ -367,11 +413,58 @@ impl SkillTfidfIndex {
}
fn tokenize(&self, text: &str) -> Vec<String> {
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty() && s.len() > 1 && !self.stop_words.contains(*s))
.map(|s| s.to_string())
.collect()
let lower = text.to_lowercase();
let segments = lower.split(|c: char| !c.is_alphanumeric())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>();
let mut tokens = Vec::new();
for segment in &segments {
let chars: Vec<char> = segment.chars().collect();
// Check if segment contains CJK characters
let has_cjk = chars.iter().any(|&c| Self::is_cjk(c));
if has_cjk && chars.len() >= 2 {
// CJK: generate character bigrams (e.g. "财报解读" → ["财报", "报解", "解读"])
for window in chars.windows(2) {
let bigram = format!("{}{}", window[0], window[1]);
if !self.stop_words.contains(&bigram) {
tokens.push(bigram);
}
}
// Also add individual CJK chars as unigrams for shorter queries
if chars.len() <= 4 {
for &c in &chars {
if Self::is_cjk(c) {
let s = c.to_string();
if !self.stop_words.contains(&s) {
tokens.push(s);
}
}
}
}
} else if !has_cjk && segment.len() > 1 {
// Non-CJK: use as-is (existing behavior)
if !self.stop_words.contains(*segment) {
tokens.push(segment.to_string());
}
}
}
tokens
}
/// Check if a character is CJK (Chinese, Japanese, Korean)
fn is_cjk(c: char) -> bool {
matches!(c,
'\u{4E00}'..='\u{9FFF}' | // CJK Unified Ideographs
'\u{3400}'..='\u{4DBF}' | // CJK Extension A
'\u{F900}'..='\u{FAFF}' | // CJK Compatibility Ideographs
'\u{3040}'..='\u{309F}' | // Hiragana
'\u{30A0}'..='\u{30FF}' | // Katakana
'\u{AC00}'..='\u{D7AF}' // Hangul Syllables
)
}
fn cosine_sim_maps(v1: &HashMap<String, f32>, v2: &HashMap<String, f32>) -> f32 {
@@ -516,4 +609,110 @@ mod tests {
let c = vec![0.0, 1.0, 0.0];
assert!((cosine_similarity(&a, &c) - 0.0).abs() < 0.001);
}
/// Mock LLM fallback that always picks the candidate matching target_skill_id
struct MockLlmFallback {
target_skill_id: String,
}
#[async_trait]
impl RuntimeLlmIntent for MockLlmFallback {
async fn resolve_skill(
&self,
_query: &str,
candidates: &[ScoredCandidate],
) -> Option<RoutingResult> {
let candidate = candidates.iter().find(|c| c.manifest.id.as_str() == self.target_skill_id)?;
Some(RoutingResult {
skill_id: candidate.manifest.id.to_string(),
confidence: 0.75,
parameters: serde_json::json!({}),
reasoning: "LLM selected this skill".to_string(),
})
}
}
#[tokio::test]
async fn test_llm_fallback_invoked_when_below_threshold() {
let registry = Arc::new(SkillRegistry::new());
// Register skills with very similar descriptions to force low confidence
let s1 = make_manifest("skill-a", "数据分析师", "数据分析和可视化报告", vec!["数据"]);
let s2 = make_manifest("skill-b", "数据工程师", "数据管道和 ETL 处理", vec!["数据"]);
registry.register(
Arc::new(crate::runner::PromptOnlySkill::new(s1.clone(), String::new())),
s1,
).await;
registry.register(
Arc::new(crate::runner::PromptOnlySkill::new(s2.clone(), String::new())),
s2,
).await;
// Router with impossibly high threshold to force LLM fallback
let router = SemanticSkillRouter::new_tf_idf_only(registry)
.with_confidence_threshold(2.0) // No TF-IDF score can reach this
.with_llm_fallback(Arc::new(MockLlmFallback {
target_skill_id: "skill-b".to_string(),
}));
let result = router.route("数据处理").await;
assert!(result.is_some());
let r = result.unwrap();
// LLM fallback picks skill-b regardless of TF-IDF ranking
assert_eq!(r.skill_id, "skill-b");
assert_eq!(r.reasoning, "LLM selected this skill");
}
#[tokio::test]
async fn test_no_llm_fallback_when_high_confidence() {
let registry = Arc::new(SkillRegistry::new());
let finance = make_manifest(
"finance-tracker",
"财务追踪专家",
"财务追踪专家 专注于企业财务数据分析、财报解读、盈利能力评估",
vec!["财报", "财务分析"],
);
registry.register(
Arc::new(crate::runner::PromptOnlySkill::new(finance.clone(), String::new())),
finance,
).await;
// Router with LLM fallback that would pick wrong answer — but high TF-IDF should skip LLM
let router = SemanticSkillRouter::new_tf_idf_only(registry)
.with_confidence_threshold(0.3) // Low threshold → TF-IDF should exceed it
.with_llm_fallback(Arc::new(MockLlmFallback {
target_skill_id: "nonexistent".to_string(),
}));
let result = router.route("分析腾讯财报数据").await;
assert!(result.is_some());
let r = result.unwrap();
assert_eq!(r.skill_id, "finance-tracker");
// Should NOT be LLM reasoning
assert!(r.reasoning.contains("High semantic match"));
}
#[tokio::test]
async fn test_no_llm_fallback_returns_best_match() {
let registry = Arc::new(SkillRegistry::new());
let s1 = make_manifest("skill-x", "数据分析师", "数据分析和可视化报告", vec!["数据"]);
registry.register(
Arc::new(crate::runner::PromptOnlySkill::new(s1.clone(), String::new())),
s1,
).await;
// No LLM fallback configured
let router = SemanticSkillRouter::new_tf_idf_only(registry)
.with_confidence_threshold(0.99);
let result = router.route("数据分析").await;
assert!(result.is_some());
// Should still return best TF-IDF match even below threshold
assert_eq!(result.unwrap().skill_id, "skill-x");
}
}