From 59fc7debd6812e9c9e3f0f0dd239216d43e20f0b Mon Sep 17 00:00:00 2001
From: iven <iven_h@qq.com>
Date: Wed, 1 Apr 2026 23:16:57 +0800
Subject: [PATCH] feat(hands): add 25 unit tests + fix summary + fix HTML
 extraction for ResearcherHand

- Add comprehensive test suite: config, types, action deserialization, URL encoding,
  HTML text extraction, hand trait methods
- Fix summary field: generate rule-based summary from top search results (was always None)
- Fix extract_text_from_html: correct position tracking for script/style tag detection

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/zclaw-hands/src/hands/researcher.rs | 321 ++++++++++++++++++++-
 1 file changed, 310 insertions(+), 11 deletions(-)
diff --git a/crates/zclaw-hands/src/hands/researcher.rs b/crates/zclaw-hands/src/hands/researcher.rs
index 1a38853..c737a00 100644
--- a/crates/zclaw-hands/src/hands/researcher.rs
+++ b/crates/zclaw-hands/src/hands/researcher.rs
@@ -344,31 +344,34 @@ impl ResearcherHand {
 
     /// Extract readable text from HTML
     fn extract_text_from_html(&self, html: &str) -> String {
-        // Simple text extraction - remove HTML tags
+        let html_lower = html.to_lowercase();
         let mut text = String::new();
         let mut in_tag = false;
         let mut in_script = false;
         let mut in_style = false;
+        let mut pos: usize = 0;
 
         for c in html.chars() {
+            let char_len = c.len_utf8();
             match c {
                 '<' => {
-                    in_tag = true;
-                    let remaining = html[text.len()..].to_lowercase();
+                    // Check for closing tags before entering tag mode
+                    let remaining = &html_lower[pos..];
+                    if remaining.starts_with("</script") {
+                        in_script = false;
+                    } else if remaining.starts_with("</style") {
+                        in_style = false;
+                    }
+                    // Check for opening tags
                     if remaining.starts_with("<script") {
                         in_script = true;
                     } else if remaining.starts_with("<style") {
                         in_style = true;
                     }
+                    in_tag = true;
                 }
                 '>' => {
                     in_tag = false;
-                    let remaining = html[text.len()..].to_lowercase();
-                    if remaining.starts_with("</script>") {
-                        in_script = false;
-                    } else if remaining.starts_with("</style>") {
-                        in_style = false;
-                    }
                 }
                 _ if in_tag => {}
                 _ if in_script || in_style => {}
@@ -379,9 +382,9 @@ impl ResearcherHand {
                 }
                 _ => text.push(c),
             }
+            pos += char_len;
         }
 
-        // Limit length
         if text.len() > 10000 {
             text.truncate(10000);
             text.push_str("...");
@@ -445,10 +448,33 @@ impl ResearcherHand {
 
         let duration = start.elapsed().as_millis() as u64;
 
+        // Generate summary from top results
+        let summary = if results.is_empty() {
+            "未找到相关结果，建议调整搜索关键词后重试".to_string()
+        } else {
+            let top_snippets: Vec<&str> = results
+                .iter()
+                .take(3)
+                .filter_map(|r| {
+                    let s = r.snippet.trim();
+                    if s.is_empty() { None } else { Some(s) }
+                })
+                .collect();
+            if top_snippets.is_empty() {
+                format!("找到 {} 条相关结果，但无摘要信息", results.len())
+            } else {
+                format!(
+                    "基于 {} 条搜索结果：{}",
+                    results.len(),
+                    top_snippets.join("；")
+                )
+            }
+        };
+
         Ok(ResearchReport {
             query: query.query.clone(),
             results,
-            summary: None, // Would require LLM integration
+            summary: Some(summary),
             key_findings,
             related_topics,
             researched_at: chrono::Utc::now().to_rfc3339(),
@@ -543,3 +569,276 @@ fn url_encode(s: &str) -> String {
         })
         .collect()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn create_test_hand() -> ResearcherHand {
+        ResearcherHand::new()
+    }
+
+    fn test_context() -> HandContext {
+        HandContext::default()
+    }
+
+    // --- Config & Type Tests ---
+
+    #[test]
+    fn test_config_id() {
+        let hand = create_test_hand();
+        assert_eq!(hand.config().id, "researcher");
+        assert_eq!(hand.config().name, "研究员");
+        assert!(hand.config().enabled);
+        assert!(!hand.config().needs_approval);
+    }
+
+    #[test]
+    fn test_search_engine_default_is_auto() {
+        let engine = SearchEngine::default();
+        assert!(matches!(engine, SearchEngine::Auto));
+    }
+
+    #[test]
+    fn test_research_depth_default_is_standard() {
+        let depth = ResearchDepth::default();
+        assert!(matches!(depth, ResearchDepth::Standard));
+    }
+
+    #[test]
+    fn test_research_depth_serialize() {
+        let json = serde_json::to_string(&ResearchDepth::Deep).unwrap();
+        assert_eq!(json, "\"deep\"");
+    }
+
+    #[test]
+    fn test_research_depth_deserialize() {
+        let depth: ResearchDepth = serde_json::from_str("\"quick\"").unwrap();
+        assert!(matches!(depth, ResearchDepth::Quick));
+    }
+
+    #[test]
+    fn test_search_engine_serialize_roundtrip() {
+        for engine in [SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] {
+            let json = serde_json::to_string(&engine).unwrap();
+            let back: SearchEngine = serde_json::from_str(&json).unwrap();
+            assert_eq!(json, serde_json::to_string(&back).unwrap());
+        }
+    }
+
+    // --- Action Deserialization Tests ---
+
+    #[test]
+    fn test_action_search_deserialize() {
+        let json = json!({
+            "action": "search",
+            "query": {
+                "query": "Rust programming",
+                "engine": "duckduckgo",
+                "depth": "quick",
+                "maxResults": 5
+            }
+        });
+        let action: ResearcherAction = serde_json::from_value(json).unwrap();
+        match action {
+            ResearcherAction::Search { query } => {
+                assert_eq!(query.query, "Rust programming");
+                assert!(matches!(query.engine, SearchEngine::DuckDuckGo));
+                assert!(matches!(query.depth, ResearchDepth::Quick));
+                assert_eq!(query.max_results, 5);
+            }
+            _ => panic!("Expected Search action"),
+        }
+    }
+
+    #[test]
+    fn test_action_fetch_deserialize() {
+        let json = json!({
+            "action": "fetch",
+            "url": "https://example.com/page"
+        });
+        let action: ResearcherAction = serde_json::from_value(json).unwrap();
+        match action {
+            ResearcherAction::Fetch { url } => {
+                assert_eq!(url, "https://example.com/page");
+            }
+            _ => panic!("Expected Fetch action"),
+        }
+    }
+
+    #[test]
+    fn test_action_report_deserialize() {
+        let json = json!({
+            "action": "report",
+            "query": {
+                "query": "AI trends 2026",
+                "depth": "deep"
+            }
+        });
+        let action: ResearcherAction = serde_json::from_value(json).unwrap();
+        match action {
+            ResearcherAction::Report { query } => {
+                assert_eq!(query.query, "AI trends 2026");
+                assert!(matches!(query.depth, ResearchDepth::Deep));
+            }
+            _ => panic!("Expected Report action"),
+        }
+    }
+
+    #[test]
+    fn test_action_invalid_rejected() {
+        let json = json!({
+            "action": "unknown_action",
+            "data": "whatever"
+        });
+        let result: std::result::Result<ResearcherAction, _> = serde_json::from_value(json);
+        assert!(result.is_err());
+    }
+
+    // --- URL Encoding Tests ---
+
+    #[test]
+    fn test_url_encode_ascii() {
+        assert_eq!(url_encode("hello world"), "hello%20world");
+    }
+
+    #[test]
+    fn test_url_encode_chinese() {
+        let encoded = url_encode("中文搜索");
+        assert!(encoded.contains("%"));
+        // Chinese chars should be percent-encoded
+        assert!(!encoded.contains("中文"));
+    }
+
+    #[test]
+    fn test_url_encode_safe_chars() {
+        assert_eq!(url_encode("abc123-_."), "abc123-_.".to_string());
+    }
+
+    #[test]
+    fn test_url_encode_empty() {
+        assert_eq!(url_encode(""), "");
+    }
+
+    // --- HTML Text Extraction Tests ---
+
+    #[test]
+    fn test_extract_text_basic() {
+        let hand = create_test_hand();
+        let html = "<html><body><h1>Title</h1><p>Content here</p></body></html>";
+        let text = hand.extract_text_from_html(html);
+        assert!(text.contains("Title"));
+        assert!(text.contains("Content here"));
+    }
+
+    #[test]
+    fn test_extract_text_strips_scripts() {
+        let hand = create_test_hand();
+        let html = "<html><body><script>alert('xss')</script><p>Safe text</p></body></html>";
+        let text = hand.extract_text_from_html(html);
+        assert!(!text.contains("alert"));
+        assert!(text.contains("Safe text"));
+    }
+
+    #[test]
+    fn test_extract_text_strips_styles() {
+        let hand = create_test_hand();
+        let html = "<html><body><style>.class{color:red}</style><p>Visible</p></body></html>";
+        let text = hand.extract_text_from_html(html);
+        assert!(!text.contains("color"));
+        assert!(text.contains("Visible"));
+    }
+
+    #[test]
+    fn test_extract_text_truncates_long_content() {
+        let hand = create_test_hand();
+        let long_body: String = "x".repeat(20000);
+        let html = format!("<html><body><p>{}</p></body></html>", long_body);
+        let text = hand.extract_text_from_html(&html);
+        assert!(text.len() <= 10003); // 10000 + "..."
+    }
+
+    #[test]
+    fn test_extract_text_empty_body() {
+        let hand = create_test_hand();
+        let html = "<html><body></body></html>";
+        let text = hand.extract_text_from_html(html);
+        assert!(text.is_empty());
+    }
+
+    // --- Hand Trait Tests ---
+
+    #[tokio::test]
+    async fn test_needs_approval_is_false() {
+        let hand = create_test_hand();
+        assert!(!hand.needs_approval());
+    }
+
+    #[tokio::test]
+    async fn test_status_is_idle() {
+        let hand = create_test_hand();
+        assert!(matches!(hand.status(), crate::HandStatus::Idle));
+    }
+
+    #[tokio::test]
+    async fn test_check_dependencies_ok() {
+        let hand = create_test_hand();
+        let missing = hand.check_dependencies().unwrap();
+        // Default is_dependency_available returns true for all
+        assert!(missing.is_empty());
+    }
+
+    // --- Default Values Tests ---
+
+    #[test]
+    fn test_research_query_defaults() {
+        let json = json!({ "query": "test" });
+        let query: ResearchQuery = serde_json::from_value(json).unwrap();
+        assert_eq!(query.query, "test");
+        assert!(matches!(query.engine, SearchEngine::Auto));
+        assert!(matches!(query.depth, ResearchDepth::Standard));
+        assert_eq!(query.max_results, 10);
+        assert_eq!(query.time_limit_secs, 60);
+        assert!(!query.include_related);
+    }
+
+    #[test]
+    fn test_search_result_serialization() {
+        let result = SearchResult {
+            title: "Test".to_string(),
+            url: "https://example.com".to_string(),
+            snippet: "A snippet".to_string(),
+            source: "TestSource".to_string(),
+            relevance: 90,
+            content: None,
+            fetched_at: None,
+        };
+        let json = serde_json::to_string(&result).unwrap();
+        assert!(json.contains("Test"));
+        assert!(json.contains("https://example.com"));
+    }
+
+    #[test]
+    fn test_research_report_summary_is_some_when_results() {
+        // Verify the struct allows Some value
+        let report = ResearchReport {
+            query: "test".to_string(),
+            results: vec![SearchResult {
+                title: "R".to_string(),
+                url: "https://r.co".to_string(),
+                snippet: "snippet text".to_string(),
+                source: "S".to_string(),
+                relevance: 80,
+                content: None,
+                fetched_at: None,
+            }],
+            summary: Some("基于 1 条搜索结果：snippet text".to_string()),
+            key_findings: vec![],
+            related_topics: vec![],
+            researched_at: "2026-01-01T00:00:00Z".to_string(),
+            duration_ms: 100,
+        };
+        assert!(report.summary.is_some());
+        assert!(report.summary.unwrap().contains("snippet text"));
+    }
+}