//! Embedding router tests (EM-01 ~ EM-06) //! //! Validates SemanticSkillRouter with embedding, TF-IDF fallback, //! dimension mismatch handling, empty queries, CJK queries, and LLM fallback. use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; use zclaw_skills::semantic_router::{ Embedder, NoOpEmbedder, SemanticSkillRouter, RuntimeLlmIntent, RoutingResult, ScoredCandidate, cosine_similarity, }; use zclaw_skills::{SkillRegistry, PromptOnlySkill, SkillManifest, SkillMode}; use zclaw_types::id::SkillId; fn make_manifest(id: &str, name: &str, triggers: Vec<&str>) -> SkillManifest { SkillManifest { id: SkillId::new(id), name: name.to_string(), description: format!("{} description", name), version: "1.0.0".to_string(), mode: SkillMode::PromptOnly, triggers: triggers.into_iter().map(String::from).collect(), enabled: true, author: None, capabilities: Vec::new(), input_schema: None, output_schema: None, tags: Vec::new(), category: None, tools: Vec::new(), body: None, } } /// Mock embedder that returns fixed 768-dim vectors with variation by text hash. struct MockEmbedder { dim: usize, should_fail: bool, } impl MockEmbedder { fn new(dim: usize) -> Self { Self { dim, should_fail: false } } fn failing() -> Self { Self { dim: 768, should_fail: true } } } #[async_trait] impl Embedder for MockEmbedder { async fn embed(&self, text: &str) -> Option> { if self.should_fail { return None; } // Deterministic vector based on text content let mut vec = vec![0.0f32; self.dim]; for (i, b) in text.as_bytes().iter().enumerate() { vec[i % self.dim] += (*b as f32) / 255.0; } // Normalize let norm: f32 = vec.iter().map(|v| v * v).sum::().sqrt().max(1e-8); for v in vec.iter_mut() { *v /= norm; } Some(vec) } } /// Helper: register skills and build router with embedding. async fn build_router_with_skills( embedder: Arc, skills: Vec<(&str, &str, Vec<&str>)>, ) -> SemanticSkillRouter { let registry = Arc::new(SkillRegistry::new()); for (id, name, triggers) in skills { let manifest = make_manifest(id, name, triggers); registry .register( Arc::new(zclaw_skills::PromptOnlySkill::new( manifest.clone(), format!("Execute {}", name), )), manifest, ) .await; } let mut router = SemanticSkillRouter::new(registry, embedder); router.rebuild_index().await; router } /// EM-01: Embedding API normal routing with 70/30 hybrid scoring. #[tokio::test] async fn em01_embedding_normal_routing() { let router = build_router_with_skills( Arc::new(MockEmbedder::new(768)), vec![ ("finance", "财务追踪", vec!["财务", "花销", "支出", "账单"]), ("scheduling", "排班管理", vec!["排班", "班表", "值班"]), ("news", "新闻搜索", vec!["新闻", "资讯", "头条"]), ], ) .await; let result = router.route("帮我查一下上个月的花销").await; assert!(result.is_some(), "should match a skill"); let r = result.unwrap(); assert_eq!(r.skill_id, "finance", "should match finance skill"); assert!( r.confidence > 0.1, "confidence should be positive: {}", r.confidence ); } /// EM-02: Embedding API failure degrades to TF-IDF. #[tokio::test] async fn em02_embedding_failure_fallback_to_tfidf() { let router = build_router_with_skills( Arc::new(MockEmbedder::failing()), vec![ ("finance", "财务追踪", vec!["财务", "花销"]), ("scheduling", "排班管理", vec!["排班", "班表"]), ], ) .await; // Should still return results via TF-IDF fallback let result = router.route("帮我查花销").await; assert!( result.is_some(), "TF-IDF fallback should still produce results" ); } /// EM-03: Embedding dimension mismatch — no panic. #[tokio::test] async fn em03_embedding_dimension_mismatch() { // Use a mismatched embedder that returns different dimensions struct MismatchedEmbedder; #[async_trait] impl Embedder for MismatchedEmbedder { async fn embed(&self, _text: &str) -> Option> { // Return a small vector — won't match index embeddings Some(vec![0.5; 64]) } } let router = build_router_with_skills( Arc::new(MismatchedEmbedder), vec![("finance", "财务追踪", vec!["财务", "花销"])], ) .await; // Should not panic let result = router.route("查花销").await; // May return None or a result via TF-IDF — key assertion: no panic let _ = result; } /// EM-04: Empty query handling. #[tokio::test] async fn em04_empty_query_handling() { let router = build_router_with_skills( Arc::new(MockEmbedder::new(768)), vec![("finance", "财务追踪", vec!["财务"])], ) .await; let result = router.route("").await; // Empty query may return None or a low-confidence result // Key: no panic let _ = result; } /// EM-05: Pure Chinese CJK query with bigram matching. #[tokio::test] async fn em05_cjk_query_matching() { let router = build_router_with_skills( Arc::new(NoOpEmbedder), // TF-IDF only vec![ ("scheduling", "排班管理", vec!["排班", "班表", "值班"]), ("news", "新闻搜索", vec!["新闻"]), ], ) .await; let result = router.route("我这个月值班表怎么排").await; assert!(result.is_some(), "CJK query should match"); assert_eq!( result.unwrap().skill_id, "scheduling", "should match scheduling skill" ); } /// EM-06: LLM fallback triggered for ambiguous queries. #[tokio::test] async fn em06_llm_fallback_triggered() { struct MockLlmFallback { target: String, } #[async_trait] impl RuntimeLlmIntent for MockLlmFallback { async fn resolve_skill( &self, _query: &str, candidates: &[ScoredCandidate], ) -> Option { let c = candidates .iter() .find(|c| c.manifest.id.as_str() == self.target)?; Some(RoutingResult { skill_id: c.manifest.id.to_string(), confidence: 0.75, parameters: serde_json::json!({}), reasoning: "LLM selected".to_string(), }) } } let registry = Arc::new(SkillRegistry::new()); let manifest = make_manifest("helper", "通用助手", vec!["帮助", "处理"]); registry .register( Arc::new(zclaw_skills::PromptOnlySkill::new( manifest.clone(), "Help".to_string(), )), manifest, ) .await; let mut router = SemanticSkillRouter::new_tf_idf_only(registry) .with_confidence_threshold(100.0) // Force all to be below threshold .with_llm_fallback(Arc::new(MockLlmFallback { target: "helper".to_string(), })); router.rebuild_index().await; let result = router.route("帮我处理一下那个东西").await; assert!(result.is_some(), "LLM fallback should resolve"); assert_eq!(result.unwrap().skill_id, "helper"); } /// Bonus: cosine_similarity utility correctness. #[test] fn cosine_similarity_identical_vectors() { let v = vec![1.0, 0.0, 1.0, 0.0]; let sim = cosine_similarity(&v, &v); assert!((sim - 1.0).abs() < 1e-6, "identical vectors => cosine=1.0"); } #[test] fn cosine_similarity_orthogonal_vectors() { let a = vec![1.0, 0.0]; let b = vec![0.0, 1.0]; let sim = cosine_similarity(&a, &b); assert!(sim.abs() < 1e-6, "orthogonal => cosine≈0"); } #[test] fn cosine_similarity_mismatched_dimensions() { let a = vec![1.0, 0.0, 1.0]; let b = vec![1.0, 0.0]; let sim = cosine_similarity(&a, &b); assert_eq!(sim, 0.0, "mismatched dimensions => 0.0"); }