feat(hands): add 25 unit tests + fix summary + fix HTML extraction for ResearcherHand
- Add comprehensive test suite: config, types, action deserialization, URL encoding, HTML text extraction, hand trait methods - Fix summary field: generate rule-based summary from top search results (was always None) - Fix extract_text_from_html: correct position tracking for script/style tag detection Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -344,31 +344,34 @@ impl ResearcherHand {
|
||||
|
||||
/// Extract readable text from HTML
|
||||
fn extract_text_from_html(&self, html: &str) -> String {
|
||||
// Simple text extraction - remove HTML tags
|
||||
let html_lower = html.to_lowercase();
|
||||
let mut text = String::new();
|
||||
let mut in_tag = false;
|
||||
let mut in_script = false;
|
||||
let mut in_style = false;
|
||||
let mut pos: usize = 0;
|
||||
|
||||
for c in html.chars() {
|
||||
let char_len = c.len_utf8();
|
||||
match c {
|
||||
'<' => {
|
||||
in_tag = true;
|
||||
let remaining = html[text.len()..].to_lowercase();
|
||||
// Check for closing tags before entering tag mode
|
||||
let remaining = &html_lower[pos..];
|
||||
if remaining.starts_with("</script") {
|
||||
in_script = false;
|
||||
} else if remaining.starts_with("</style") {
|
||||
in_style = false;
|
||||
}
|
||||
// Check for opening tags
|
||||
if remaining.starts_with("<script") {
|
||||
in_script = true;
|
||||
} else if remaining.starts_with("<style") {
|
||||
in_style = true;
|
||||
}
|
||||
in_tag = true;
|
||||
}
|
||||
'>' => {
|
||||
in_tag = false;
|
||||
let remaining = html[text.len()..].to_lowercase();
|
||||
if remaining.starts_with("</script>") {
|
||||
in_script = false;
|
||||
} else if remaining.starts_with("</style>") {
|
||||
in_style = false;
|
||||
}
|
||||
}
|
||||
_ if in_tag => {}
|
||||
_ if in_script || in_style => {}
|
||||
@@ -379,9 +382,9 @@ impl ResearcherHand {
|
||||
}
|
||||
_ => text.push(c),
|
||||
}
|
||||
pos += char_len;
|
||||
}
|
||||
|
||||
// Limit length
|
||||
if text.len() > 10000 {
|
||||
text.truncate(10000);
|
||||
text.push_str("...");
|
||||
@@ -445,10 +448,33 @@ impl ResearcherHand {
|
||||
|
||||
let duration = start.elapsed().as_millis() as u64;
|
||||
|
||||
// Generate summary from top results
|
||||
let summary = if results.is_empty() {
|
||||
"未找到相关结果,建议调整搜索关键词后重试".to_string()
|
||||
} else {
|
||||
let top_snippets: Vec<&str> = results
|
||||
.iter()
|
||||
.take(3)
|
||||
.filter_map(|r| {
|
||||
let s = r.snippet.trim();
|
||||
if s.is_empty() { None } else { Some(s) }
|
||||
})
|
||||
.collect();
|
||||
if top_snippets.is_empty() {
|
||||
format!("找到 {} 条相关结果,但无摘要信息", results.len())
|
||||
} else {
|
||||
format!(
|
||||
"基于 {} 条搜索结果:{}",
|
||||
results.len(),
|
||||
top_snippets.join(";")
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
Ok(ResearchReport {
|
||||
query: query.query.clone(),
|
||||
results,
|
||||
summary: None, // Would require LLM integration
|
||||
summary: Some(summary),
|
||||
key_findings,
|
||||
related_topics,
|
||||
researched_at: chrono::Utc::now().to_rfc3339(),
|
||||
@@ -543,3 +569,276 @@ fn url_encode(s: &str) -> String {
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn create_test_hand() -> ResearcherHand {
|
||||
ResearcherHand::new()
|
||||
}
|
||||
|
||||
fn test_context() -> HandContext {
|
||||
HandContext::default()
|
||||
}
|
||||
|
||||
// --- Config & Type Tests ---
|
||||
|
||||
#[test]
|
||||
fn test_config_id() {
|
||||
let hand = create_test_hand();
|
||||
assert_eq!(hand.config().id, "researcher");
|
||||
assert_eq!(hand.config().name, "研究员");
|
||||
assert!(hand.config().enabled);
|
||||
assert!(!hand.config().needs_approval);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_engine_default_is_auto() {
|
||||
let engine = SearchEngine::default();
|
||||
assert!(matches!(engine, SearchEngine::Auto));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_research_depth_default_is_standard() {
|
||||
let depth = ResearchDepth::default();
|
||||
assert!(matches!(depth, ResearchDepth::Standard));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_research_depth_serialize() {
|
||||
let json = serde_json::to_string(&ResearchDepth::Deep).unwrap();
|
||||
assert_eq!(json, "\"deep\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_research_depth_deserialize() {
|
||||
let depth: ResearchDepth = serde_json::from_str("\"quick\"").unwrap();
|
||||
assert!(matches!(depth, ResearchDepth::Quick));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_engine_serialize_roundtrip() {
|
||||
for engine in [SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] {
|
||||
let json = serde_json::to_string(&engine).unwrap();
|
||||
let back: SearchEngine = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(json, serde_json::to_string(&back).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
// --- Action Deserialization Tests ---
|
||||
|
||||
#[test]
|
||||
fn test_action_search_deserialize() {
|
||||
let json = json!({
|
||||
"action": "search",
|
||||
"query": {
|
||||
"query": "Rust programming",
|
||||
"engine": "duckduckgo",
|
||||
"depth": "quick",
|
||||
"maxResults": 5
|
||||
}
|
||||
});
|
||||
let action: ResearcherAction = serde_json::from_value(json).unwrap();
|
||||
match action {
|
||||
ResearcherAction::Search { query } => {
|
||||
assert_eq!(query.query, "Rust programming");
|
||||
assert!(matches!(query.engine, SearchEngine::DuckDuckGo));
|
||||
assert!(matches!(query.depth, ResearchDepth::Quick));
|
||||
assert_eq!(query.max_results, 5);
|
||||
}
|
||||
_ => panic!("Expected Search action"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_action_fetch_deserialize() {
|
||||
let json = json!({
|
||||
"action": "fetch",
|
||||
"url": "https://example.com/page"
|
||||
});
|
||||
let action: ResearcherAction = serde_json::from_value(json).unwrap();
|
||||
match action {
|
||||
ResearcherAction::Fetch { url } => {
|
||||
assert_eq!(url, "https://example.com/page");
|
||||
}
|
||||
_ => panic!("Expected Fetch action"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_action_report_deserialize() {
|
||||
let json = json!({
|
||||
"action": "report",
|
||||
"query": {
|
||||
"query": "AI trends 2026",
|
||||
"depth": "deep"
|
||||
}
|
||||
});
|
||||
let action: ResearcherAction = serde_json::from_value(json).unwrap();
|
||||
match action {
|
||||
ResearcherAction::Report { query } => {
|
||||
assert_eq!(query.query, "AI trends 2026");
|
||||
assert!(matches!(query.depth, ResearchDepth::Deep));
|
||||
}
|
||||
_ => panic!("Expected Report action"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_action_invalid_rejected() {
|
||||
let json = json!({
|
||||
"action": "unknown_action",
|
||||
"data": "whatever"
|
||||
});
|
||||
let result: std::result::Result<ResearcherAction, _> = serde_json::from_value(json);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
// --- URL Encoding Tests ---
|
||||
|
||||
#[test]
|
||||
fn test_url_encode_ascii() {
|
||||
assert_eq!(url_encode("hello world"), "hello%20world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_encode_chinese() {
|
||||
let encoded = url_encode("中文搜索");
|
||||
assert!(encoded.contains("%"));
|
||||
// Chinese chars should be percent-encoded
|
||||
assert!(!encoded.contains("中文"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_encode_safe_chars() {
|
||||
assert_eq!(url_encode("abc123-_."), "abc123-_.".to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_encode_empty() {
|
||||
assert_eq!(url_encode(""), "");
|
||||
}
|
||||
|
||||
// --- HTML Text Extraction Tests ---
|
||||
|
||||
#[test]
|
||||
fn test_extract_text_basic() {
|
||||
let hand = create_test_hand();
|
||||
let html = "<html><body><h1>Title</h1><p>Content here</p></body></html>";
|
||||
let text = hand.extract_text_from_html(html);
|
||||
assert!(text.contains("Title"));
|
||||
assert!(text.contains("Content here"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_text_strips_scripts() {
|
||||
let hand = create_test_hand();
|
||||
let html = "<html><body><script>alert('xss')</script><p>Safe text</p></body></html>";
|
||||
let text = hand.extract_text_from_html(html);
|
||||
assert!(!text.contains("alert"));
|
||||
assert!(text.contains("Safe text"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_text_strips_styles() {
|
||||
let hand = create_test_hand();
|
||||
let html = "<html><body><style>.class{color:red}</style><p>Visible</p></body></html>";
|
||||
let text = hand.extract_text_from_html(html);
|
||||
assert!(!text.contains("color"));
|
||||
assert!(text.contains("Visible"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_text_truncates_long_content() {
|
||||
let hand = create_test_hand();
|
||||
let long_body: String = "x".repeat(20000);
|
||||
let html = format!("<html><body><p>{}</p></body></html>", long_body);
|
||||
let text = hand.extract_text_from_html(&html);
|
||||
assert!(text.len() <= 10003); // 10000 + "..."
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_text_empty_body() {
|
||||
let hand = create_test_hand();
|
||||
let html = "<html><body></body></html>";
|
||||
let text = hand.extract_text_from_html(html);
|
||||
assert!(text.is_empty());
|
||||
}
|
||||
|
||||
// --- Hand Trait Tests ---
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_needs_approval_is_false() {
|
||||
let hand = create_test_hand();
|
||||
assert!(!hand.needs_approval());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_status_is_idle() {
|
||||
let hand = create_test_hand();
|
||||
assert!(matches!(hand.status(), crate::HandStatus::Idle));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_check_dependencies_ok() {
|
||||
let hand = create_test_hand();
|
||||
let missing = hand.check_dependencies().unwrap();
|
||||
// Default is_dependency_available returns true for all
|
||||
assert!(missing.is_empty());
|
||||
}
|
||||
|
||||
// --- Default Values Tests ---
|
||||
|
||||
#[test]
|
||||
fn test_research_query_defaults() {
|
||||
let json = json!({ "query": "test" });
|
||||
let query: ResearchQuery = serde_json::from_value(json).unwrap();
|
||||
assert_eq!(query.query, "test");
|
||||
assert!(matches!(query.engine, SearchEngine::Auto));
|
||||
assert!(matches!(query.depth, ResearchDepth::Standard));
|
||||
assert_eq!(query.max_results, 10);
|
||||
assert_eq!(query.time_limit_secs, 60);
|
||||
assert!(!query.include_related);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_result_serialization() {
|
||||
let result = SearchResult {
|
||||
title: "Test".to_string(),
|
||||
url: "https://example.com".to_string(),
|
||||
snippet: "A snippet".to_string(),
|
||||
source: "TestSource".to_string(),
|
||||
relevance: 90,
|
||||
content: None,
|
||||
fetched_at: None,
|
||||
};
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("Test"));
|
||||
assert!(json.contains("https://example.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_research_report_summary_is_some_when_results() {
|
||||
// Verify the struct allows Some value
|
||||
let report = ResearchReport {
|
||||
query: "test".to_string(),
|
||||
results: vec![SearchResult {
|
||||
title: "R".to_string(),
|
||||
url: "https://r.co".to_string(),
|
||||
snippet: "snippet text".to_string(),
|
||||
source: "S".to_string(),
|
||||
relevance: 80,
|
||||
content: None,
|
||||
fetched_at: None,
|
||||
}],
|
||||
summary: Some("基于 1 条搜索结果:snippet text".to_string()),
|
||||
key_findings: vec![],
|
||||
related_topics: vec![],
|
||||
researched_at: "2026-01-01T00:00:00Z".to_string(),
|
||||
duration_ms: 100,
|
||||
};
|
||||
assert!(report.summary.is_some());
|
||||
assert!(report.summary.unwrap().contains("snippet text"));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user