//! Researcher Hand - Deep research and analysis capabilities //! //! This hand provides web search, content fetching, and research synthesis. use async_trait::async_trait; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::RwLock; use zclaw_types::Result; use crate::{Hand, HandConfig, HandContext, HandResult}; /// Search engine options #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum SearchEngine { Google, Bing, DuckDuckGo, Auto, } impl Default for SearchEngine { fn default() -> Self { Self::Auto } } /// Research depth level #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum ResearchDepth { Quick, // Fast search, top 3 results Standard, // Normal search, top 10 results Deep, // Comprehensive search, multiple sources } impl Default for ResearchDepth { fn default() -> Self { Self::Standard } } /// Research query configuration #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ResearchQuery { /// Search query pub query: String, /// Search engine to use #[serde(default)] pub engine: SearchEngine, /// Research depth #[serde(default)] pub depth: ResearchDepth, /// Maximum results to return #[serde(default = "default_max_results")] pub max_results: usize, /// Include related topics #[serde(default)] pub include_related: bool, /// Time limit in seconds #[serde(default = "default_time_limit")] pub time_limit_secs: u64, } fn default_max_results() -> usize { 10 } fn default_time_limit() -> u64 { 60 } /// Search result item #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct SearchResult { /// Title of the result pub title: String, /// URL pub url: String, /// Snippet/summary pub snippet: String, /// Source name pub source: String, /// Relevance score (0-100) #[serde(default)] pub relevance: u8, /// Fetched content (if available) #[serde(default)] pub content: Option, /// Timestamp #[serde(default)] pub fetched_at: Option, } /// Research report #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ResearchReport { /// Original query pub query: String, /// Search results pub results: Vec, /// Synthesized summary #[serde(default)] pub summary: Option, /// Key findings #[serde(default)] pub key_findings: Vec, /// Related topics discovered #[serde(default)] pub related_topics: Vec, /// Research timestamp pub researched_at: String, /// Total time spent (ms) pub duration_ms: u64, } /// Researcher action types #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "action")] pub enum ResearcherAction { #[serde(rename = "search")] Search { query: ResearchQuery }, #[serde(rename = "fetch")] Fetch { url: String }, #[serde(rename = "summarize")] Summarize { urls: Vec }, #[serde(rename = "report")] Report { query: ResearchQuery }, } /// Researcher Hand implementation pub struct ResearcherHand { config: HandConfig, client: reqwest::Client, cache: Arc>>, } impl ResearcherHand { /// Create a new researcher hand pub fn new() -> Self { Self { config: HandConfig { id: "researcher".to_string(), name: "研究员".to_string(), description: "深度研究和分析能力,支持网络搜索和内容获取".to_string(), needs_approval: false, dependencies: vec!["network".to_string()], input_schema: Some(serde_json::json!({ "type": "object", "oneOf": [ { "properties": { "action": { "const": "search" }, "query": { "type": "object", "properties": { "query": { "type": "string" }, "engine": { "type": "string", "enum": ["google", "bing", "duckduckgo", "auto"] }, "depth": { "type": "string", "enum": ["quick", "standard", "deep"] }, "maxResults": { "type": "integer" } }, "required": ["query"] } }, "required": ["action", "query"] }, { "properties": { "action": { "const": "fetch" }, "url": { "type": "string" } }, "required": ["action", "url"] }, { "properties": { "action": { "const": "report" }, "query": { "$ref": "#/properties/query" } }, "required": ["action", "query"] } ] })), tags: vec!["research".to_string(), "web".to_string(), "search".to_string()], enabled: true, }, client: reqwest::Client::builder() .timeout(std::time::Duration::from_secs(30)) .user_agent("ZCLAW-Researcher/1.0") .build() .unwrap_or_else(|_| reqwest::Client::new()), cache: Arc::new(RwLock::new(HashMap::new())), } } /// Execute a web search async fn execute_search(&self, query: &ResearchQuery) -> Result> { let start = std::time::Instant::now(); // Use DuckDuckGo as default search (no API key required) let results = self.search_duckduckgo(&query.query, query.max_results).await?; let duration = start.elapsed().as_millis() as u64; tracing::info!( target: "researcher", query = %query.query, duration_ms = duration, results_count = results.len(), "Search completed" ); Ok(results) } /// Search using DuckDuckGo (no API key required) async fn search_duckduckgo(&self, query: &str, max_results: usize) -> Result> { let url = format!("https://api.duckduckgo.com/?q={}&format=json&no_html=1", url_encode(query)); let response = self.client .get(&url) .send() .await .map_err(|e| zclaw_types::ZclawError::HandError(format!("Search request failed: {}", e)))?; let json: Value = response.json().await .map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to parse search response: {}", e)))?; let mut results = Vec::new(); // Parse DuckDuckGo Instant Answer if let Some(abstract_text) = json.get("AbstractText").and_then(|v| v.as_str()) { if !abstract_text.is_empty() { results.push(SearchResult { title: query.to_string(), url: json.get("AbstractURL") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(), snippet: abstract_text.to_string(), source: json.get("AbstractSource") .and_then(|v| v.as_str()) .unwrap_or("DuckDuckGo") .to_string(), relevance: 100, content: None, fetched_at: Some(chrono::Utc::now().to_rfc3339()), }); } } // Parse related topics if let Some(related) = json.get("RelatedTopics").and_then(|v| v.as_array()) { for item in related.iter().take(max_results) { if let Some(obj) = item.as_object() { results.push(SearchResult { title: obj.get("Text") .and_then(|v| v.as_str()) .unwrap_or("Related Topic") .to_string(), url: obj.get("FirstURL") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(), snippet: obj.get("Text") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(), source: "DuckDuckGo".to_string(), relevance: 80, content: None, fetched_at: Some(chrono::Utc::now().to_rfc3339()), }); } } } Ok(results) } /// Fetch content from a URL async fn execute_fetch(&self, url: &str) -> Result { let start = std::time::Instant::now(); // Check cache first { let cache = self.cache.read().await; if let Some(cached) = cache.get(url) { if cached.content.is_some() { return Ok(cached.clone()); } } } let response = self.client .get(url) .send() .await .map_err(|e| zclaw_types::ZclawError::HandError(format!("Fetch request failed: {}", e)))?; let content_type = response.headers() .get(reqwest::header::CONTENT_TYPE) .and_then(|v| v.to_str().ok()) .unwrap_or(""); let content = if content_type.contains("text/html") { // Extract text from HTML let html = response.text().await .map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read HTML: {}", e)))?; self.extract_text_from_html(&html) } else if content_type.contains("text/") || content_type.contains("application/json") { response.text().await .map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read text: {}", e)))? } else { "[Binary content]".to_string() }; let result = SearchResult { title: url.to_string(), url: url.to_string(), snippet: content.chars().take(500).collect(), source: url.to_string(), relevance: 100, content: Some(content), fetched_at: Some(chrono::Utc::now().to_rfc3339()), }; // Cache the result { let mut cache = self.cache.write().await; cache.insert(url.to_string(), result.clone()); } let duration = start.elapsed().as_millis() as u64; tracing::info!( target: "researcher", url = url, duration_ms = duration, "Fetch completed" ); Ok(result) } /// Extract readable text from HTML fn extract_text_from_html(&self, html: &str) -> String { // Simple text extraction - remove HTML tags let mut text = String::new(); let mut in_tag = false; let mut in_script = false; let mut in_style = false; for c in html.chars() { match c { '<' => { in_tag = true; let remaining = html[text.len()..].to_lowercase(); if remaining.starts_with("' => { in_tag = false; let remaining = html[text.len()..].to_lowercase(); if remaining.starts_with("") { in_script = false; } else if remaining.starts_with("") { in_style = false; } } _ if in_tag => {} _ if in_script || in_style => {} ' ' | '\n' | '\t' | '\r' => { if !text.ends_with(' ') && !text.is_empty() { text.push(' '); } } _ => text.push(c), } } // Limit length if text.len() > 10000 { text.truncate(10000); text.push_str("..."); } text.trim().to_string() } /// Generate a comprehensive research report async fn execute_report(&self, query: &ResearchQuery) -> Result { let start = std::time::Instant::now(); // First, execute search let mut results = self.execute_search(query).await?; // Fetch content for top results let fetch_limit = match query.depth { ResearchDepth::Quick => 1, ResearchDepth::Standard => 3, ResearchDepth::Deep => 5, }; for result in results.iter_mut().take(fetch_limit) { if !result.url.is_empty() { match self.execute_fetch(&result.url).await { Ok(fetched) => { result.content = fetched.content; result.fetched_at = fetched.fetched_at; } Err(e) => { tracing::warn!(target: "researcher", error = %e, "Failed to fetch content"); } } } } // Extract key findings let key_findings: Vec = results.iter() .take(5) .filter_map(|r| { r.content.as_ref().map(|c| { c.split(". ") .take(3) .collect::>() .join(". ") }) }) .collect(); // Extract related topics from snippets let related_topics: Vec = results.iter() .filter_map(|r| { if r.snippet.len() > 50 { Some(r.title.clone()) } else { None } }) .take(5) .collect(); let duration = start.elapsed().as_millis() as u64; Ok(ResearchReport { query: query.query.clone(), results, summary: None, // Would require LLM integration key_findings, related_topics, researched_at: chrono::Utc::now().to_rfc3339(), duration_ms: duration, }) } } impl Default for ResearcherHand { fn default() -> Self { Self::new() } } #[async_trait] impl Hand for ResearcherHand { fn config(&self) -> &HandConfig { &self.config } async fn execute(&self, _context: &HandContext, input: Value) -> Result { let action: ResearcherAction = serde_json::from_value(input.clone()) .map_err(|e| zclaw_types::ZclawError::HandError(format!("Invalid action: {}", e)))?; let start = std::time::Instant::now(); let result = match action { ResearcherAction::Search { query } => { let results = self.execute_search(&query).await?; json!({ "action": "search", "query": query.query, "results": results, "duration_ms": start.elapsed().as_millis() }) } ResearcherAction::Fetch { url } => { let result = self.execute_fetch(&url).await?; json!({ "action": "fetch", "url": url, "result": result, "duration_ms": start.elapsed().as_millis() }) } ResearcherAction::Summarize { urls } => { let mut results = Vec::new(); for url in urls.iter().take(5) { if let Ok(result) = self.execute_fetch(url).await { results.push(result); } } json!({ "action": "summarize", "urls": urls, "results": results, "duration_ms": start.elapsed().as_millis() }) } ResearcherAction::Report { query } => { let report = self.execute_report(&query).await?; json!({ "action": "report", "report": report }) } }; Ok(HandResult::success(result)) } fn needs_approval(&self) -> bool { false // Research operations are generally safe } fn check_dependencies(&self) -> Result> { // Network connectivity will be checked at runtime Ok(Vec::new()) } fn status(&self) -> crate::HandStatus { crate::HandStatus::Idle } } /// URL encoding helper (simple implementation) fn url_encode(s: &str) -> String { s.chars() .map(|c| match c { 'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => c.to_string(), _ => format!("%{:02X}", c as u32), }) .collect() }