feat(hands): add 25 unit tests + fix summary + fix HTML extraction for ResearcherHand

- Add comprehensive test suite: config, types, action deserialization, URL encoding,
  HTML text extraction, hand trait methods
- Fix summary field: generate rule-based summary from top search results (was always None)
- Fix extract_text_from_html: correct position tracking for script/style tag detection

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
iven
2026-04-01 23:16:57 +08:00
parent 73ff5e8c5e
commit 59fc7debd6

View File

@@ -344,31 +344,34 @@ impl ResearcherHand {
/// Extract readable text from HTML
fn extract_text_from_html(&self, html: &str) -> String {
// Simple text extraction - remove HTML tags
let html_lower = html.to_lowercase();
let mut text = String::new();
let mut in_tag = false;
let mut in_script = false;
let mut in_style = false;
let mut pos: usize = 0;
for c in html.chars() {
let char_len = c.len_utf8();
match c {
'<' => {
in_tag = true;
let remaining = html[text.len()..].to_lowercase();
// Check for closing tags before entering tag mode
let remaining = &html_lower[pos..];
if remaining.starts_with("</script") {
in_script = false;
} else if remaining.starts_with("</style") {
in_style = false;
}
// Check for opening tags
if remaining.starts_with("<script") {
in_script = true;
} else if remaining.starts_with("<style") {
in_style = true;
}
in_tag = true;
}
'>' => {
in_tag = false;
let remaining = html[text.len()..].to_lowercase();
if remaining.starts_with("</script>") {
in_script = false;
} else if remaining.starts_with("</style>") {
in_style = false;
}
}
_ if in_tag => {}
_ if in_script || in_style => {}
@@ -379,9 +382,9 @@ impl ResearcherHand {
}
_ => text.push(c),
}
pos += char_len;
}
// Limit length
if text.len() > 10000 {
text.truncate(10000);
text.push_str("...");
@@ -445,10 +448,33 @@ impl ResearcherHand {
let duration = start.elapsed().as_millis() as u64;
// Generate summary from top results
let summary = if results.is_empty() {
"未找到相关结果,建议调整搜索关键词后重试".to_string()
} else {
let top_snippets: Vec<&str> = results
.iter()
.take(3)
.filter_map(|r| {
let s = r.snippet.trim();
if s.is_empty() { None } else { Some(s) }
})
.collect();
if top_snippets.is_empty() {
format!("找到 {} 条相关结果,但无摘要信息", results.len())
} else {
format!(
"基于 {} 条搜索结果:{}",
results.len(),
top_snippets.join("")
)
}
};
Ok(ResearchReport {
query: query.query.clone(),
results,
summary: None, // Would require LLM integration
summary: Some(summary),
key_findings,
related_topics,
researched_at: chrono::Utc::now().to_rfc3339(),
@@ -543,3 +569,276 @@ fn url_encode(s: &str) -> String {
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_hand() -> ResearcherHand {
ResearcherHand::new()
}
fn test_context() -> HandContext {
HandContext::default()
}
// --- Config & Type Tests ---
#[test]
fn test_config_id() {
let hand = create_test_hand();
assert_eq!(hand.config().id, "researcher");
assert_eq!(hand.config().name, "研究员");
assert!(hand.config().enabled);
assert!(!hand.config().needs_approval);
}
#[test]
fn test_search_engine_default_is_auto() {
let engine = SearchEngine::default();
assert!(matches!(engine, SearchEngine::Auto));
}
#[test]
fn test_research_depth_default_is_standard() {
let depth = ResearchDepth::default();
assert!(matches!(depth, ResearchDepth::Standard));
}
#[test]
fn test_research_depth_serialize() {
let json = serde_json::to_string(&ResearchDepth::Deep).unwrap();
assert_eq!(json, "\"deep\"");
}
#[test]
fn test_research_depth_deserialize() {
let depth: ResearchDepth = serde_json::from_str("\"quick\"").unwrap();
assert!(matches!(depth, ResearchDepth::Quick));
}
#[test]
fn test_search_engine_serialize_roundtrip() {
for engine in [SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] {
let json = serde_json::to_string(&engine).unwrap();
let back: SearchEngine = serde_json::from_str(&json).unwrap();
assert_eq!(json, serde_json::to_string(&back).unwrap());
}
}
// --- Action Deserialization Tests ---
#[test]
fn test_action_search_deserialize() {
let json = json!({
"action": "search",
"query": {
"query": "Rust programming",
"engine": "duckduckgo",
"depth": "quick",
"maxResults": 5
}
});
let action: ResearcherAction = serde_json::from_value(json).unwrap();
match action {
ResearcherAction::Search { query } => {
assert_eq!(query.query, "Rust programming");
assert!(matches!(query.engine, SearchEngine::DuckDuckGo));
assert!(matches!(query.depth, ResearchDepth::Quick));
assert_eq!(query.max_results, 5);
}
_ => panic!("Expected Search action"),
}
}
#[test]
fn test_action_fetch_deserialize() {
let json = json!({
"action": "fetch",
"url": "https://example.com/page"
});
let action: ResearcherAction = serde_json::from_value(json).unwrap();
match action {
ResearcherAction::Fetch { url } => {
assert_eq!(url, "https://example.com/page");
}
_ => panic!("Expected Fetch action"),
}
}
#[test]
fn test_action_report_deserialize() {
let json = json!({
"action": "report",
"query": {
"query": "AI trends 2026",
"depth": "deep"
}
});
let action: ResearcherAction = serde_json::from_value(json).unwrap();
match action {
ResearcherAction::Report { query } => {
assert_eq!(query.query, "AI trends 2026");
assert!(matches!(query.depth, ResearchDepth::Deep));
}
_ => panic!("Expected Report action"),
}
}
#[test]
fn test_action_invalid_rejected() {
let json = json!({
"action": "unknown_action",
"data": "whatever"
});
let result: std::result::Result<ResearcherAction, _> = serde_json::from_value(json);
assert!(result.is_err());
}
// --- URL Encoding Tests ---
#[test]
fn test_url_encode_ascii() {
assert_eq!(url_encode("hello world"), "hello%20world");
}
#[test]
fn test_url_encode_chinese() {
let encoded = url_encode("中文搜索");
assert!(encoded.contains("%"));
// Chinese chars should be percent-encoded
assert!(!encoded.contains("中文"));
}
#[test]
fn test_url_encode_safe_chars() {
assert_eq!(url_encode("abc123-_."), "abc123-_.".to_string());
}
#[test]
fn test_url_encode_empty() {
assert_eq!(url_encode(""), "");
}
// --- HTML Text Extraction Tests ---
#[test]
fn test_extract_text_basic() {
let hand = create_test_hand();
let html = "<html><body><h1>Title</h1><p>Content here</p></body></html>";
let text = hand.extract_text_from_html(html);
assert!(text.contains("Title"));
assert!(text.contains("Content here"));
}
#[test]
fn test_extract_text_strips_scripts() {
let hand = create_test_hand();
let html = "<html><body><script>alert('xss')</script><p>Safe text</p></body></html>";
let text = hand.extract_text_from_html(html);
assert!(!text.contains("alert"));
assert!(text.contains("Safe text"));
}
#[test]
fn test_extract_text_strips_styles() {
let hand = create_test_hand();
let html = "<html><body><style>.class{color:red}</style><p>Visible</p></body></html>";
let text = hand.extract_text_from_html(html);
assert!(!text.contains("color"));
assert!(text.contains("Visible"));
}
#[test]
fn test_extract_text_truncates_long_content() {
let hand = create_test_hand();
let long_body: String = "x".repeat(20000);
let html = format!("<html><body><p>{}</p></body></html>", long_body);
let text = hand.extract_text_from_html(&html);
assert!(text.len() <= 10003); // 10000 + "..."
}
#[test]
fn test_extract_text_empty_body() {
let hand = create_test_hand();
let html = "<html><body></body></html>";
let text = hand.extract_text_from_html(html);
assert!(text.is_empty());
}
// --- Hand Trait Tests ---
#[tokio::test]
async fn test_needs_approval_is_false() {
let hand = create_test_hand();
assert!(!hand.needs_approval());
}
#[tokio::test]
async fn test_status_is_idle() {
let hand = create_test_hand();
assert!(matches!(hand.status(), crate::HandStatus::Idle));
}
#[tokio::test]
async fn test_check_dependencies_ok() {
let hand = create_test_hand();
let missing = hand.check_dependencies().unwrap();
// Default is_dependency_available returns true for all
assert!(missing.is_empty());
}
// --- Default Values Tests ---
#[test]
fn test_research_query_defaults() {
let json = json!({ "query": "test" });
let query: ResearchQuery = serde_json::from_value(json).unwrap();
assert_eq!(query.query, "test");
assert!(matches!(query.engine, SearchEngine::Auto));
assert!(matches!(query.depth, ResearchDepth::Standard));
assert_eq!(query.max_results, 10);
assert_eq!(query.time_limit_secs, 60);
assert!(!query.include_related);
}
#[test]
fn test_search_result_serialization() {
let result = SearchResult {
title: "Test".to_string(),
url: "https://example.com".to_string(),
snippet: "A snippet".to_string(),
source: "TestSource".to_string(),
relevance: 90,
content: None,
fetched_at: None,
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("Test"));
assert!(json.contains("https://example.com"));
}
#[test]
fn test_research_report_summary_is_some_when_results() {
// Verify the struct allows Some value
let report = ResearchReport {
query: "test".to_string(),
results: vec![SearchResult {
title: "R".to_string(),
url: "https://r.co".to_string(),
snippet: "snippet text".to_string(),
source: "S".to_string(),
relevance: 80,
content: None,
fetched_at: None,
}],
summary: Some("基于 1 条搜索结果snippet text".to_string()),
key_findings: vec![],
related_topics: vec![],
researched_at: "2026-01-01T00:00:00Z".to_string(),
duration_ms: 100,
};
assert!(report.summary.is_some());
assert!(report.summary.unwrap().contains("snippet text"));
}
}