- Add comprehensive test suite: config, types, action deserialization, URL encoding, HTML text extraction, hand trait methods - Fix summary field: generate rule-based summary from top search results (was always None) - Fix extract_text_from_html: correct position tracking for script/style tag detection Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
845 lines
27 KiB
Rust
845 lines
27 KiB
Rust
//! Researcher Hand - Deep research and analysis capabilities
|
||
//!
|
||
//! This hand provides web search, content fetching, and research synthesis.
|
||
|
||
use async_trait::async_trait;
|
||
use serde::{Deserialize, Serialize};
|
||
use serde_json::{json, Value};
|
||
use std::collections::HashMap;
|
||
use std::sync::Arc;
|
||
use tokio::sync::RwLock;
|
||
use zclaw_types::Result;
|
||
|
||
use crate::{Hand, HandConfig, HandContext, HandResult};
|
||
|
||
/// Search engine options
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
#[serde(rename_all = "lowercase")]
|
||
pub enum SearchEngine {
|
||
Google,
|
||
Bing,
|
||
DuckDuckGo,
|
||
Auto,
|
||
}
|
||
|
||
impl Default for SearchEngine {
|
||
fn default() -> Self {
|
||
Self::Auto
|
||
}
|
||
}
|
||
|
||
/// Research depth level
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
#[serde(rename_all = "lowercase")]
|
||
pub enum ResearchDepth {
|
||
Quick, // Fast search, top 3 results
|
||
Standard, // Normal search, top 10 results
|
||
Deep, // Comprehensive search, multiple sources
|
||
}
|
||
|
||
impl Default for ResearchDepth {
|
||
fn default() -> Self {
|
||
Self::Standard
|
||
}
|
||
}
|
||
|
||
/// Research query configuration
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
#[serde(rename_all = "camelCase")]
|
||
pub struct ResearchQuery {
|
||
/// Search query
|
||
pub query: String,
|
||
/// Search engine to use
|
||
#[serde(default)]
|
||
pub engine: SearchEngine,
|
||
/// Research depth
|
||
#[serde(default)]
|
||
pub depth: ResearchDepth,
|
||
/// Maximum results to return
|
||
#[serde(default = "default_max_results")]
|
||
pub max_results: usize,
|
||
/// Include related topics
|
||
#[serde(default)]
|
||
pub include_related: bool,
|
||
/// Time limit in seconds
|
||
#[serde(default = "default_time_limit")]
|
||
pub time_limit_secs: u64,
|
||
}
|
||
|
||
fn default_max_results() -> usize { 10 }
|
||
fn default_time_limit() -> u64 { 60 }
|
||
|
||
/// Search result item
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
#[serde(rename_all = "camelCase")]
|
||
pub struct SearchResult {
|
||
/// Title of the result
|
||
pub title: String,
|
||
/// URL
|
||
pub url: String,
|
||
/// Snippet/summary
|
||
pub snippet: String,
|
||
/// Source name
|
||
pub source: String,
|
||
/// Relevance score (0-100)
|
||
#[serde(default)]
|
||
pub relevance: u8,
|
||
/// Fetched content (if available)
|
||
#[serde(default)]
|
||
pub content: Option<String>,
|
||
/// Timestamp
|
||
#[serde(default)]
|
||
pub fetched_at: Option<String>,
|
||
}
|
||
|
||
/// Research report
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
#[serde(rename_all = "camelCase")]
|
||
pub struct ResearchReport {
|
||
/// Original query
|
||
pub query: String,
|
||
/// Search results
|
||
pub results: Vec<SearchResult>,
|
||
/// Synthesized summary
|
||
#[serde(default)]
|
||
pub summary: Option<String>,
|
||
/// Key findings
|
||
#[serde(default)]
|
||
pub key_findings: Vec<String>,
|
||
/// Related topics discovered
|
||
#[serde(default)]
|
||
pub related_topics: Vec<String>,
|
||
/// Research timestamp
|
||
pub researched_at: String,
|
||
/// Total time spent (ms)
|
||
pub duration_ms: u64,
|
||
}
|
||
|
||
/// Researcher action types
|
||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||
#[serde(tag = "action")]
|
||
pub enum ResearcherAction {
|
||
#[serde(rename = "search")]
|
||
Search { query: ResearchQuery },
|
||
#[serde(rename = "fetch")]
|
||
Fetch { url: String },
|
||
#[serde(rename = "summarize")]
|
||
Summarize { urls: Vec<String> },
|
||
#[serde(rename = "report")]
|
||
Report { query: ResearchQuery },
|
||
}
|
||
|
||
/// Researcher Hand implementation
|
||
pub struct ResearcherHand {
|
||
config: HandConfig,
|
||
client: reqwest::Client,
|
||
cache: Arc<RwLock<HashMap<String, SearchResult>>>,
|
||
}
|
||
|
||
impl ResearcherHand {
|
||
/// Create a new researcher hand
|
||
pub fn new() -> Self {
|
||
Self {
|
||
config: HandConfig {
|
||
id: "researcher".to_string(),
|
||
name: "研究员".to_string(),
|
||
description: "深度研究和分析能力,支持网络搜索和内容获取".to_string(),
|
||
needs_approval: false,
|
||
dependencies: vec!["network".to_string()],
|
||
input_schema: Some(serde_json::json!({
|
||
"type": "object",
|
||
"oneOf": [
|
||
{
|
||
"properties": {
|
||
"action": { "const": "search" },
|
||
"query": {
|
||
"type": "object",
|
||
"properties": {
|
||
"query": { "type": "string" },
|
||
"engine": { "type": "string", "enum": ["google", "bing", "duckduckgo", "auto"] },
|
||
"depth": { "type": "string", "enum": ["quick", "standard", "deep"] },
|
||
"maxResults": { "type": "integer" }
|
||
},
|
||
"required": ["query"]
|
||
}
|
||
},
|
||
"required": ["action", "query"]
|
||
},
|
||
{
|
||
"properties": {
|
||
"action": { "const": "fetch" },
|
||
"url": { "type": "string" }
|
||
},
|
||
"required": ["action", "url"]
|
||
},
|
||
{
|
||
"properties": {
|
||
"action": { "const": "report" },
|
||
"query": { "$ref": "#/properties/query" }
|
||
},
|
||
"required": ["action", "query"]
|
||
}
|
||
]
|
||
})),
|
||
tags: vec!["research".to_string(), "web".to_string(), "search".to_string()],
|
||
enabled: true,
|
||
},
|
||
client: reqwest::Client::builder()
|
||
.timeout(std::time::Duration::from_secs(30))
|
||
.user_agent("ZCLAW-Researcher/1.0")
|
||
.build()
|
||
.unwrap_or_else(|_| reqwest::Client::new()),
|
||
cache: Arc::new(RwLock::new(HashMap::new())),
|
||
}
|
||
}
|
||
|
||
/// Execute a web search
|
||
async fn execute_search(&self, query: &ResearchQuery) -> Result<Vec<SearchResult>> {
|
||
let start = std::time::Instant::now();
|
||
|
||
// Use DuckDuckGo as default search (no API key required)
|
||
let results = self.search_duckduckgo(&query.query, query.max_results).await?;
|
||
|
||
let duration = start.elapsed().as_millis() as u64;
|
||
tracing::info!(
|
||
target: "researcher",
|
||
query = %query.query,
|
||
duration_ms = duration,
|
||
results_count = results.len(),
|
||
"Search completed"
|
||
);
|
||
|
||
Ok(results)
|
||
}
|
||
|
||
/// Search using DuckDuckGo (no API key required)
|
||
async fn search_duckduckgo(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
|
||
let url = format!("https://api.duckduckgo.com/?q={}&format=json&no_html=1",
|
||
url_encode(query));
|
||
|
||
let response = self.client
|
||
.get(&url)
|
||
.send()
|
||
.await
|
||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Search request failed: {}", e)))?;
|
||
|
||
let json: Value = response.json().await
|
||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to parse search response: {}", e)))?;
|
||
|
||
let mut results = Vec::new();
|
||
|
||
// Parse DuckDuckGo Instant Answer
|
||
if let Some(abstract_text) = json.get("AbstractText").and_then(|v| v.as_str()) {
|
||
if !abstract_text.is_empty() {
|
||
results.push(SearchResult {
|
||
title: query.to_string(),
|
||
url: json.get("AbstractURL")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("")
|
||
.to_string(),
|
||
snippet: abstract_text.to_string(),
|
||
source: json.get("AbstractSource")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("DuckDuckGo")
|
||
.to_string(),
|
||
relevance: 100,
|
||
content: None,
|
||
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
||
});
|
||
}
|
||
}
|
||
|
||
// Parse related topics
|
||
if let Some(related) = json.get("RelatedTopics").and_then(|v| v.as_array()) {
|
||
for item in related.iter().take(max_results) {
|
||
if let Some(obj) = item.as_object() {
|
||
results.push(SearchResult {
|
||
title: obj.get("Text")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("Related Topic")
|
||
.to_string(),
|
||
url: obj.get("FirstURL")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("")
|
||
.to_string(),
|
||
snippet: obj.get("Text")
|
||
.and_then(|v| v.as_str())
|
||
.unwrap_or("")
|
||
.to_string(),
|
||
source: "DuckDuckGo".to_string(),
|
||
relevance: 80,
|
||
content: None,
|
||
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
Ok(results)
|
||
}
|
||
|
||
/// Fetch content from a URL
|
||
async fn execute_fetch(&self, url: &str) -> Result<SearchResult> {
|
||
let start = std::time::Instant::now();
|
||
|
||
// Check cache first
|
||
{
|
||
let cache = self.cache.read().await;
|
||
if let Some(cached) = cache.get(url) {
|
||
if cached.content.is_some() {
|
||
return Ok(cached.clone());
|
||
}
|
||
}
|
||
}
|
||
|
||
let response = self.client
|
||
.get(url)
|
||
.send()
|
||
.await
|
||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Fetch request failed: {}", e)))?;
|
||
|
||
let content_type = response.headers()
|
||
.get(reqwest::header::CONTENT_TYPE)
|
||
.and_then(|v| v.to_str().ok())
|
||
.unwrap_or("");
|
||
|
||
let content = if content_type.contains("text/html") {
|
||
// Extract text from HTML
|
||
let html = response.text().await
|
||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read HTML: {}", e)))?;
|
||
self.extract_text_from_html(&html)
|
||
} else if content_type.contains("text/") || content_type.contains("application/json") {
|
||
response.text().await
|
||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read text: {}", e)))?
|
||
} else {
|
||
"[Binary content]".to_string()
|
||
};
|
||
|
||
let result = SearchResult {
|
||
title: url.to_string(),
|
||
url: url.to_string(),
|
||
snippet: content.chars().take(500).collect(),
|
||
source: url.to_string(),
|
||
relevance: 100,
|
||
content: Some(content),
|
||
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
||
};
|
||
|
||
// Cache the result
|
||
{
|
||
let mut cache = self.cache.write().await;
|
||
cache.insert(url.to_string(), result.clone());
|
||
}
|
||
|
||
let duration = start.elapsed().as_millis() as u64;
|
||
tracing::info!(
|
||
target: "researcher",
|
||
url = url,
|
||
duration_ms = duration,
|
||
"Fetch completed"
|
||
);
|
||
|
||
Ok(result)
|
||
}
|
||
|
||
/// Extract readable text from HTML
|
||
fn extract_text_from_html(&self, html: &str) -> String {
|
||
let html_lower = html.to_lowercase();
|
||
let mut text = String::new();
|
||
let mut in_tag = false;
|
||
let mut in_script = false;
|
||
let mut in_style = false;
|
||
let mut pos: usize = 0;
|
||
|
||
for c in html.chars() {
|
||
let char_len = c.len_utf8();
|
||
match c {
|
||
'<' => {
|
||
// Check for closing tags before entering tag mode
|
||
let remaining = &html_lower[pos..];
|
||
if remaining.starts_with("</script") {
|
||
in_script = false;
|
||
} else if remaining.starts_with("</style") {
|
||
in_style = false;
|
||
}
|
||
// Check for opening tags
|
||
if remaining.starts_with("<script") {
|
||
in_script = true;
|
||
} else if remaining.starts_with("<style") {
|
||
in_style = true;
|
||
}
|
||
in_tag = true;
|
||
}
|
||
'>' => {
|
||
in_tag = false;
|
||
}
|
||
_ if in_tag => {}
|
||
_ if in_script || in_style => {}
|
||
' ' | '\n' | '\t' | '\r' => {
|
||
if !text.ends_with(' ') && !text.is_empty() {
|
||
text.push(' ');
|
||
}
|
||
}
|
||
_ => text.push(c),
|
||
}
|
||
pos += char_len;
|
||
}
|
||
|
||
if text.len() > 10000 {
|
||
text.truncate(10000);
|
||
text.push_str("...");
|
||
}
|
||
|
||
text.trim().to_string()
|
||
}
|
||
|
||
/// Generate a comprehensive research report
|
||
async fn execute_report(&self, query: &ResearchQuery) -> Result<ResearchReport> {
|
||
let start = std::time::Instant::now();
|
||
|
||
// First, execute search
|
||
let mut results = self.execute_search(query).await?;
|
||
|
||
// Fetch content for top results
|
||
let fetch_limit = match query.depth {
|
||
ResearchDepth::Quick => 1,
|
||
ResearchDepth::Standard => 3,
|
||
ResearchDepth::Deep => 5,
|
||
};
|
||
|
||
for result in results.iter_mut().take(fetch_limit) {
|
||
if !result.url.is_empty() {
|
||
match self.execute_fetch(&result.url).await {
|
||
Ok(fetched) => {
|
||
result.content = fetched.content;
|
||
result.fetched_at = fetched.fetched_at;
|
||
}
|
||
Err(e) => {
|
||
tracing::warn!(target: "researcher", error = %e, "Failed to fetch content");
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Extract key findings
|
||
let key_findings: Vec<String> = results.iter()
|
||
.take(5)
|
||
.filter_map(|r| {
|
||
r.content.as_ref().map(|c| {
|
||
c.split(". ")
|
||
.take(3)
|
||
.collect::<Vec<_>>()
|
||
.join(". ")
|
||
})
|
||
})
|
||
.collect();
|
||
|
||
// Extract related topics from snippets
|
||
let related_topics: Vec<String> = results.iter()
|
||
.filter_map(|r| {
|
||
if r.snippet.len() > 50 {
|
||
Some(r.title.clone())
|
||
} else {
|
||
None
|
||
}
|
||
})
|
||
.take(5)
|
||
.collect();
|
||
|
||
let duration = start.elapsed().as_millis() as u64;
|
||
|
||
// Generate summary from top results
|
||
let summary = if results.is_empty() {
|
||
"未找到相关结果,建议调整搜索关键词后重试".to_string()
|
||
} else {
|
||
let top_snippets: Vec<&str> = results
|
||
.iter()
|
||
.take(3)
|
||
.filter_map(|r| {
|
||
let s = r.snippet.trim();
|
||
if s.is_empty() { None } else { Some(s) }
|
||
})
|
||
.collect();
|
||
if top_snippets.is_empty() {
|
||
format!("找到 {} 条相关结果,但无摘要信息", results.len())
|
||
} else {
|
||
format!(
|
||
"基于 {} 条搜索结果:{}",
|
||
results.len(),
|
||
top_snippets.join(";")
|
||
)
|
||
}
|
||
};
|
||
|
||
Ok(ResearchReport {
|
||
query: query.query.clone(),
|
||
results,
|
||
summary: Some(summary),
|
||
key_findings,
|
||
related_topics,
|
||
researched_at: chrono::Utc::now().to_rfc3339(),
|
||
duration_ms: duration,
|
||
})
|
||
}
|
||
}
|
||
|
||
impl Default for ResearcherHand {
|
||
fn default() -> Self {
|
||
Self::new()
|
||
}
|
||
}
|
||
|
||
#[async_trait]
|
||
impl Hand for ResearcherHand {
|
||
fn config(&self) -> &HandConfig {
|
||
&self.config
|
||
}
|
||
|
||
async fn execute(&self, _context: &HandContext, input: Value) -> Result<HandResult> {
|
||
let action: ResearcherAction = serde_json::from_value(input.clone())
|
||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Invalid action: {}", e)))?;
|
||
|
||
let start = std::time::Instant::now();
|
||
|
||
let result = match action {
|
||
ResearcherAction::Search { query } => {
|
||
let results = self.execute_search(&query).await?;
|
||
json!({
|
||
"action": "search",
|
||
"query": query.query,
|
||
"results": results,
|
||
"duration_ms": start.elapsed().as_millis()
|
||
})
|
||
}
|
||
ResearcherAction::Fetch { url } => {
|
||
let result = self.execute_fetch(&url).await?;
|
||
json!({
|
||
"action": "fetch",
|
||
"url": url,
|
||
"result": result,
|
||
"duration_ms": start.elapsed().as_millis()
|
||
})
|
||
}
|
||
ResearcherAction::Summarize { urls } => {
|
||
let mut results = Vec::new();
|
||
for url in urls.iter().take(5) {
|
||
if let Ok(result) = self.execute_fetch(url).await {
|
||
results.push(result);
|
||
}
|
||
}
|
||
json!({
|
||
"action": "summarize",
|
||
"urls": urls,
|
||
"results": results,
|
||
"duration_ms": start.elapsed().as_millis()
|
||
})
|
||
}
|
||
ResearcherAction::Report { query } => {
|
||
let report = self.execute_report(&query).await?;
|
||
json!({
|
||
"action": "report",
|
||
"report": report
|
||
})
|
||
}
|
||
};
|
||
|
||
Ok(HandResult::success(result))
|
||
}
|
||
|
||
fn needs_approval(&self) -> bool {
|
||
false // Research operations are generally safe
|
||
}
|
||
|
||
fn check_dependencies(&self) -> Result<Vec<String>> {
|
||
// Network connectivity will be checked at runtime
|
||
Ok(Vec::new())
|
||
}
|
||
|
||
fn status(&self) -> crate::HandStatus {
|
||
crate::HandStatus::Idle
|
||
}
|
||
}
|
||
|
||
/// URL encoding helper (simple implementation)
|
||
fn url_encode(s: &str) -> String {
|
||
s.chars()
|
||
.map(|c| match c {
|
||
'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => c.to_string(),
|
||
_ => format!("%{:02X}", c as u32),
|
||
})
|
||
.collect()
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
fn create_test_hand() -> ResearcherHand {
|
||
ResearcherHand::new()
|
||
}
|
||
|
||
fn test_context() -> HandContext {
|
||
HandContext::default()
|
||
}
|
||
|
||
// --- Config & Type Tests ---
|
||
|
||
#[test]
|
||
fn test_config_id() {
|
||
let hand = create_test_hand();
|
||
assert_eq!(hand.config().id, "researcher");
|
||
assert_eq!(hand.config().name, "研究员");
|
||
assert!(hand.config().enabled);
|
||
assert!(!hand.config().needs_approval);
|
||
}
|
||
|
||
#[test]
|
||
fn test_search_engine_default_is_auto() {
|
||
let engine = SearchEngine::default();
|
||
assert!(matches!(engine, SearchEngine::Auto));
|
||
}
|
||
|
||
#[test]
|
||
fn test_research_depth_default_is_standard() {
|
||
let depth = ResearchDepth::default();
|
||
assert!(matches!(depth, ResearchDepth::Standard));
|
||
}
|
||
|
||
#[test]
|
||
fn test_research_depth_serialize() {
|
||
let json = serde_json::to_string(&ResearchDepth::Deep).unwrap();
|
||
assert_eq!(json, "\"deep\"");
|
||
}
|
||
|
||
#[test]
|
||
fn test_research_depth_deserialize() {
|
||
let depth: ResearchDepth = serde_json::from_str("\"quick\"").unwrap();
|
||
assert!(matches!(depth, ResearchDepth::Quick));
|
||
}
|
||
|
||
#[test]
|
||
fn test_search_engine_serialize_roundtrip() {
|
||
for engine in [SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] {
|
||
let json = serde_json::to_string(&engine).unwrap();
|
||
let back: SearchEngine = serde_json::from_str(&json).unwrap();
|
||
assert_eq!(json, serde_json::to_string(&back).unwrap());
|
||
}
|
||
}
|
||
|
||
// --- Action Deserialization Tests ---
|
||
|
||
#[test]
|
||
fn test_action_search_deserialize() {
|
||
let json = json!({
|
||
"action": "search",
|
||
"query": {
|
||
"query": "Rust programming",
|
||
"engine": "duckduckgo",
|
||
"depth": "quick",
|
||
"maxResults": 5
|
||
}
|
||
});
|
||
let action: ResearcherAction = serde_json::from_value(json).unwrap();
|
||
match action {
|
||
ResearcherAction::Search { query } => {
|
||
assert_eq!(query.query, "Rust programming");
|
||
assert!(matches!(query.engine, SearchEngine::DuckDuckGo));
|
||
assert!(matches!(query.depth, ResearchDepth::Quick));
|
||
assert_eq!(query.max_results, 5);
|
||
}
|
||
_ => panic!("Expected Search action"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_action_fetch_deserialize() {
|
||
let json = json!({
|
||
"action": "fetch",
|
||
"url": "https://example.com/page"
|
||
});
|
||
let action: ResearcherAction = serde_json::from_value(json).unwrap();
|
||
match action {
|
||
ResearcherAction::Fetch { url } => {
|
||
assert_eq!(url, "https://example.com/page");
|
||
}
|
||
_ => panic!("Expected Fetch action"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_action_report_deserialize() {
|
||
let json = json!({
|
||
"action": "report",
|
||
"query": {
|
||
"query": "AI trends 2026",
|
||
"depth": "deep"
|
||
}
|
||
});
|
||
let action: ResearcherAction = serde_json::from_value(json).unwrap();
|
||
match action {
|
||
ResearcherAction::Report { query } => {
|
||
assert_eq!(query.query, "AI trends 2026");
|
||
assert!(matches!(query.depth, ResearchDepth::Deep));
|
||
}
|
||
_ => panic!("Expected Report action"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn test_action_invalid_rejected() {
|
||
let json = json!({
|
||
"action": "unknown_action",
|
||
"data": "whatever"
|
||
});
|
||
let result: std::result::Result<ResearcherAction, _> = serde_json::from_value(json);
|
||
assert!(result.is_err());
|
||
}
|
||
|
||
// --- URL Encoding Tests ---
|
||
|
||
#[test]
|
||
fn test_url_encode_ascii() {
|
||
assert_eq!(url_encode("hello world"), "hello%20world");
|
||
}
|
||
|
||
#[test]
|
||
fn test_url_encode_chinese() {
|
||
let encoded = url_encode("中文搜索");
|
||
assert!(encoded.contains("%"));
|
||
// Chinese chars should be percent-encoded
|
||
assert!(!encoded.contains("中文"));
|
||
}
|
||
|
||
#[test]
|
||
fn test_url_encode_safe_chars() {
|
||
assert_eq!(url_encode("abc123-_."), "abc123-_.".to_string());
|
||
}
|
||
|
||
#[test]
|
||
fn test_url_encode_empty() {
|
||
assert_eq!(url_encode(""), "");
|
||
}
|
||
|
||
// --- HTML Text Extraction Tests ---
|
||
|
||
#[test]
|
||
fn test_extract_text_basic() {
|
||
let hand = create_test_hand();
|
||
let html = "<html><body><h1>Title</h1><p>Content here</p></body></html>";
|
||
let text = hand.extract_text_from_html(html);
|
||
assert!(text.contains("Title"));
|
||
assert!(text.contains("Content here"));
|
||
}
|
||
|
||
#[test]
|
||
fn test_extract_text_strips_scripts() {
|
||
let hand = create_test_hand();
|
||
let html = "<html><body><script>alert('xss')</script><p>Safe text</p></body></html>";
|
||
let text = hand.extract_text_from_html(html);
|
||
assert!(!text.contains("alert"));
|
||
assert!(text.contains("Safe text"));
|
||
}
|
||
|
||
#[test]
|
||
fn test_extract_text_strips_styles() {
|
||
let hand = create_test_hand();
|
||
let html = "<html><body><style>.class{color:red}</style><p>Visible</p></body></html>";
|
||
let text = hand.extract_text_from_html(html);
|
||
assert!(!text.contains("color"));
|
||
assert!(text.contains("Visible"));
|
||
}
|
||
|
||
#[test]
|
||
fn test_extract_text_truncates_long_content() {
|
||
let hand = create_test_hand();
|
||
let long_body: String = "x".repeat(20000);
|
||
let html = format!("<html><body><p>{}</p></body></html>", long_body);
|
||
let text = hand.extract_text_from_html(&html);
|
||
assert!(text.len() <= 10003); // 10000 + "..."
|
||
}
|
||
|
||
#[test]
|
||
fn test_extract_text_empty_body() {
|
||
let hand = create_test_hand();
|
||
let html = "<html><body></body></html>";
|
||
let text = hand.extract_text_from_html(html);
|
||
assert!(text.is_empty());
|
||
}
|
||
|
||
// --- Hand Trait Tests ---
|
||
|
||
#[tokio::test]
|
||
async fn test_needs_approval_is_false() {
|
||
let hand = create_test_hand();
|
||
assert!(!hand.needs_approval());
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn test_status_is_idle() {
|
||
let hand = create_test_hand();
|
||
assert!(matches!(hand.status(), crate::HandStatus::Idle));
|
||
}
|
||
|
||
#[tokio::test]
|
||
async fn test_check_dependencies_ok() {
|
||
let hand = create_test_hand();
|
||
let missing = hand.check_dependencies().unwrap();
|
||
// Default is_dependency_available returns true for all
|
||
assert!(missing.is_empty());
|
||
}
|
||
|
||
// --- Default Values Tests ---
|
||
|
||
#[test]
|
||
fn test_research_query_defaults() {
|
||
let json = json!({ "query": "test" });
|
||
let query: ResearchQuery = serde_json::from_value(json).unwrap();
|
||
assert_eq!(query.query, "test");
|
||
assert!(matches!(query.engine, SearchEngine::Auto));
|
||
assert!(matches!(query.depth, ResearchDepth::Standard));
|
||
assert_eq!(query.max_results, 10);
|
||
assert_eq!(query.time_limit_secs, 60);
|
||
assert!(!query.include_related);
|
||
}
|
||
|
||
#[test]
|
||
fn test_search_result_serialization() {
|
||
let result = SearchResult {
|
||
title: "Test".to_string(),
|
||
url: "https://example.com".to_string(),
|
||
snippet: "A snippet".to_string(),
|
||
source: "TestSource".to_string(),
|
||
relevance: 90,
|
||
content: None,
|
||
fetched_at: None,
|
||
};
|
||
let json = serde_json::to_string(&result).unwrap();
|
||
assert!(json.contains("Test"));
|
||
assert!(json.contains("https://example.com"));
|
||
}
|
||
|
||
#[test]
|
||
fn test_research_report_summary_is_some_when_results() {
|
||
// Verify the struct allows Some value
|
||
let report = ResearchReport {
|
||
query: "test".to_string(),
|
||
results: vec![SearchResult {
|
||
title: "R".to_string(),
|
||
url: "https://r.co".to_string(),
|
||
snippet: "snippet text".to_string(),
|
||
source: "S".to_string(),
|
||
relevance: 80,
|
||
content: None,
|
||
fetched_at: None,
|
||
}],
|
||
summary: Some("基于 1 条搜索结果:snippet text".to_string()),
|
||
key_findings: vec![],
|
||
related_topics: vec![],
|
||
researched_at: "2026-01-01T00:00:00Z".to_string(),
|
||
duration_ms: 100,
|
||
};
|
||
assert!(report.summary.is_some());
|
||
assert!(report.summary.unwrap().contains("snippet text"));
|
||
}
|
||
}
|