From 59fc7debd6812e9c9e3f0f0dd239216d43e20f0b Mon Sep 17 00:00:00 2001 From: iven Date: Wed, 1 Apr 2026 23:16:57 +0800 Subject: [PATCH] feat(hands): add 25 unit tests + fix summary + fix HTML extraction for ResearcherHand - Add comprehensive test suite: config, types, action deserialization, URL encoding, HTML text extraction, hand trait methods - Fix summary field: generate rule-based summary from top search results (was always None) - Fix extract_text_from_html: correct position tracking for script/style tag detection Co-Authored-By: Claude Opus 4.6 --- crates/zclaw-hands/src/hands/researcher.rs | 321 ++++++++++++++++++++- 1 file changed, 310 insertions(+), 11 deletions(-) diff --git a/crates/zclaw-hands/src/hands/researcher.rs b/crates/zclaw-hands/src/hands/researcher.rs index 1a38853..c737a00 100644 --- a/crates/zclaw-hands/src/hands/researcher.rs +++ b/crates/zclaw-hands/src/hands/researcher.rs @@ -344,31 +344,34 @@ impl ResearcherHand { /// Extract readable text from HTML fn extract_text_from_html(&self, html: &str) -> String { - // Simple text extraction - remove HTML tags + let html_lower = html.to_lowercase(); let mut text = String::new(); let mut in_tag = false; let mut in_script = false; let mut in_style = false; + let mut pos: usize = 0; for c in html.chars() { + let char_len = c.len_utf8(); match c { '<' => { - in_tag = true; - let remaining = html[text.len()..].to_lowercase(); + // Check for closing tags before entering tag mode + let remaining = &html_lower[pos..]; + if remaining.starts_with("' => { in_tag = false; - let remaining = html[text.len()..].to_lowercase(); - if remaining.starts_with("") { - in_script = false; - } else if remaining.starts_with("") { - in_style = false; - } } _ if in_tag => {} _ if in_script || in_style => {} @@ -379,9 +382,9 @@ impl ResearcherHand { } _ => text.push(c), } + pos += char_len; } - // Limit length if text.len() > 10000 { text.truncate(10000); text.push_str("..."); @@ -445,10 +448,33 @@ impl ResearcherHand { let duration = start.elapsed().as_millis() as u64; + // Generate summary from top results + let summary = if results.is_empty() { + "未找到相关结果,建议调整搜索关键词后重试".to_string() + } else { + let top_snippets: Vec<&str> = results + .iter() + .take(3) + .filter_map(|r| { + let s = r.snippet.trim(); + if s.is_empty() { None } else { Some(s) } + }) + .collect(); + if top_snippets.is_empty() { + format!("找到 {} 条相关结果,但无摘要信息", results.len()) + } else { + format!( + "基于 {} 条搜索结果:{}", + results.len(), + top_snippets.join(";") + ) + } + }; + Ok(ResearchReport { query: query.query.clone(), results, - summary: None, // Would require LLM integration + summary: Some(summary), key_findings, related_topics, researched_at: chrono::Utc::now().to_rfc3339(), @@ -543,3 +569,276 @@ fn url_encode(s: &str) -> String { }) .collect() } + +#[cfg(test)] +mod tests { + use super::*; + + fn create_test_hand() -> ResearcherHand { + ResearcherHand::new() + } + + fn test_context() -> HandContext { + HandContext::default() + } + + // --- Config & Type Tests --- + + #[test] + fn test_config_id() { + let hand = create_test_hand(); + assert_eq!(hand.config().id, "researcher"); + assert_eq!(hand.config().name, "研究员"); + assert!(hand.config().enabled); + assert!(!hand.config().needs_approval); + } + + #[test] + fn test_search_engine_default_is_auto() { + let engine = SearchEngine::default(); + assert!(matches!(engine, SearchEngine::Auto)); + } + + #[test] + fn test_research_depth_default_is_standard() { + let depth = ResearchDepth::default(); + assert!(matches!(depth, ResearchDepth::Standard)); + } + + #[test] + fn test_research_depth_serialize() { + let json = serde_json::to_string(&ResearchDepth::Deep).unwrap(); + assert_eq!(json, "\"deep\""); + } + + #[test] + fn test_research_depth_deserialize() { + let depth: ResearchDepth = serde_json::from_str("\"quick\"").unwrap(); + assert!(matches!(depth, ResearchDepth::Quick)); + } + + #[test] + fn test_search_engine_serialize_roundtrip() { + for engine in [SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] { + let json = serde_json::to_string(&engine).unwrap(); + let back: SearchEngine = serde_json::from_str(&json).unwrap(); + assert_eq!(json, serde_json::to_string(&back).unwrap()); + } + } + + // --- Action Deserialization Tests --- + + #[test] + fn test_action_search_deserialize() { + let json = json!({ + "action": "search", + "query": { + "query": "Rust programming", + "engine": "duckduckgo", + "depth": "quick", + "maxResults": 5 + } + }); + let action: ResearcherAction = serde_json::from_value(json).unwrap(); + match action { + ResearcherAction::Search { query } => { + assert_eq!(query.query, "Rust programming"); + assert!(matches!(query.engine, SearchEngine::DuckDuckGo)); + assert!(matches!(query.depth, ResearchDepth::Quick)); + assert_eq!(query.max_results, 5); + } + _ => panic!("Expected Search action"), + } + } + + #[test] + fn test_action_fetch_deserialize() { + let json = json!({ + "action": "fetch", + "url": "https://example.com/page" + }); + let action: ResearcherAction = serde_json::from_value(json).unwrap(); + match action { + ResearcherAction::Fetch { url } => { + assert_eq!(url, "https://example.com/page"); + } + _ => panic!("Expected Fetch action"), + } + } + + #[test] + fn test_action_report_deserialize() { + let json = json!({ + "action": "report", + "query": { + "query": "AI trends 2026", + "depth": "deep" + } + }); + let action: ResearcherAction = serde_json::from_value(json).unwrap(); + match action { + ResearcherAction::Report { query } => { + assert_eq!(query.query, "AI trends 2026"); + assert!(matches!(query.depth, ResearchDepth::Deep)); + } + _ => panic!("Expected Report action"), + } + } + + #[test] + fn test_action_invalid_rejected() { + let json = json!({ + "action": "unknown_action", + "data": "whatever" + }); + let result: std::result::Result = serde_json::from_value(json); + assert!(result.is_err()); + } + + // --- URL Encoding Tests --- + + #[test] + fn test_url_encode_ascii() { + assert_eq!(url_encode("hello world"), "hello%20world"); + } + + #[test] + fn test_url_encode_chinese() { + let encoded = url_encode("中文搜索"); + assert!(encoded.contains("%")); + // Chinese chars should be percent-encoded + assert!(!encoded.contains("中文")); + } + + #[test] + fn test_url_encode_safe_chars() { + assert_eq!(url_encode("abc123-_."), "abc123-_.".to_string()); + } + + #[test] + fn test_url_encode_empty() { + assert_eq!(url_encode(""), ""); + } + + // --- HTML Text Extraction Tests --- + + #[test] + fn test_extract_text_basic() { + let hand = create_test_hand(); + let html = "

Title

Content here

"; + let text = hand.extract_text_from_html(html); + assert!(text.contains("Title")); + assert!(text.contains("Content here")); + } + + #[test] + fn test_extract_text_strips_scripts() { + let hand = create_test_hand(); + let html = "

Safe text

"; + let text = hand.extract_text_from_html(html); + assert!(!text.contains("alert")); + assert!(text.contains("Safe text")); + } + + #[test] + fn test_extract_text_strips_styles() { + let hand = create_test_hand(); + let html = "

Visible

"; + let text = hand.extract_text_from_html(html); + assert!(!text.contains("color")); + assert!(text.contains("Visible")); + } + + #[test] + fn test_extract_text_truncates_long_content() { + let hand = create_test_hand(); + let long_body: String = "x".repeat(20000); + let html = format!("

{}

", long_body); + let text = hand.extract_text_from_html(&html); + assert!(text.len() <= 10003); // 10000 + "..." + } + + #[test] + fn test_extract_text_empty_body() { + let hand = create_test_hand(); + let html = ""; + let text = hand.extract_text_from_html(html); + assert!(text.is_empty()); + } + + // --- Hand Trait Tests --- + + #[tokio::test] + async fn test_needs_approval_is_false() { + let hand = create_test_hand(); + assert!(!hand.needs_approval()); + } + + #[tokio::test] + async fn test_status_is_idle() { + let hand = create_test_hand(); + assert!(matches!(hand.status(), crate::HandStatus::Idle)); + } + + #[tokio::test] + async fn test_check_dependencies_ok() { + let hand = create_test_hand(); + let missing = hand.check_dependencies().unwrap(); + // Default is_dependency_available returns true for all + assert!(missing.is_empty()); + } + + // --- Default Values Tests --- + + #[test] + fn test_research_query_defaults() { + let json = json!({ "query": "test" }); + let query: ResearchQuery = serde_json::from_value(json).unwrap(); + assert_eq!(query.query, "test"); + assert!(matches!(query.engine, SearchEngine::Auto)); + assert!(matches!(query.depth, ResearchDepth::Standard)); + assert_eq!(query.max_results, 10); + assert_eq!(query.time_limit_secs, 60); + assert!(!query.include_related); + } + + #[test] + fn test_search_result_serialization() { + let result = SearchResult { + title: "Test".to_string(), + url: "https://example.com".to_string(), + snippet: "A snippet".to_string(), + source: "TestSource".to_string(), + relevance: 90, + content: None, + fetched_at: None, + }; + let json = serde_json::to_string(&result).unwrap(); + assert!(json.contains("Test")); + assert!(json.contains("https://example.com")); + } + + #[test] + fn test_research_report_summary_is_some_when_results() { + // Verify the struct allows Some value + let report = ResearchReport { + query: "test".to_string(), + results: vec![SearchResult { + title: "R".to_string(), + url: "https://r.co".to_string(), + snippet: "snippet text".to_string(), + source: "S".to_string(), + relevance: 80, + content: None, + fetched_at: None, + }], + summary: Some("基于 1 条搜索结果:snippet text".to_string()), + key_findings: vec![], + related_topics: vec![], + researched_at: "2026-01-01T00:00:00Z".to_string(), + duration_ms: 100, + }; + assert!(report.summary.is_some()); + assert!(report.summary.unwrap().contains("snippet text")); + } +}