Files
zclaw_openfang/crates/zclaw-hands/src/hands/researcher.rs
iven aa6a9cbd84
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
feat: 新增技能编排引擎和工作流构建器组件
refactor: 统一Hands系统常量到单个源文件
refactor: 更新Hands中文名称和描述

fix: 修复技能市场在连接状态变化时重新加载
fix: 修复身份变更提案的错误处理逻辑

docs: 更新多个功能文档的验证状态和实现位置
docs: 更新Hands系统文档

test: 添加测试文件验证工作区路径
2026-03-25 08:27:25 +08:00

546 lines
18 KiB
Rust

//! Researcher Hand - Deep research and analysis capabilities
//!
//! This hand provides web search, content fetching, and research synthesis.
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use zclaw_types::Result;
use crate::{Hand, HandConfig, HandContext, HandResult};
/// Search engine options
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum SearchEngine {
Google,
Bing,
DuckDuckGo,
Auto,
}
impl Default for SearchEngine {
fn default() -> Self {
Self::Auto
}
}
/// Research depth level
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ResearchDepth {
Quick, // Fast search, top 3 results
Standard, // Normal search, top 10 results
Deep, // Comprehensive search, multiple sources
}
impl Default for ResearchDepth {
fn default() -> Self {
Self::Standard
}
}
/// Research query configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ResearchQuery {
/// Search query
pub query: String,
/// Search engine to use
#[serde(default)]
pub engine: SearchEngine,
/// Research depth
#[serde(default)]
pub depth: ResearchDepth,
/// Maximum results to return
#[serde(default = "default_max_results")]
pub max_results: usize,
/// Include related topics
#[serde(default)]
pub include_related: bool,
/// Time limit in seconds
#[serde(default = "default_time_limit")]
pub time_limit_secs: u64,
}
fn default_max_results() -> usize { 10 }
fn default_time_limit() -> u64 { 60 }
/// Search result item
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
/// Title of the result
pub title: String,
/// URL
pub url: String,
/// Snippet/summary
pub snippet: String,
/// Source name
pub source: String,
/// Relevance score (0-100)
#[serde(default)]
pub relevance: u8,
/// Fetched content (if available)
#[serde(default)]
pub content: Option<String>,
/// Timestamp
#[serde(default)]
pub fetched_at: Option<String>,
}
/// Research report
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ResearchReport {
/// Original query
pub query: String,
/// Search results
pub results: Vec<SearchResult>,
/// Synthesized summary
#[serde(default)]
pub summary: Option<String>,
/// Key findings
#[serde(default)]
pub key_findings: Vec<String>,
/// Related topics discovered
#[serde(default)]
pub related_topics: Vec<String>,
/// Research timestamp
pub researched_at: String,
/// Total time spent (ms)
pub duration_ms: u64,
}
/// Researcher action types
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "action")]
pub enum ResearcherAction {
#[serde(rename = "search")]
Search { query: ResearchQuery },
#[serde(rename = "fetch")]
Fetch { url: String },
#[serde(rename = "summarize")]
Summarize { urls: Vec<String> },
#[serde(rename = "report")]
Report { query: ResearchQuery },
}
/// Researcher Hand implementation
pub struct ResearcherHand {
config: HandConfig,
client: reqwest::Client,
cache: Arc<RwLock<HashMap<String, SearchResult>>>,
}
impl ResearcherHand {
/// Create a new researcher hand
pub fn new() -> Self {
Self {
config: HandConfig {
id: "researcher".to_string(),
name: "研究员".to_string(),
description: "深度研究和分析能力,支持网络搜索和内容获取".to_string(),
needs_approval: false,
dependencies: vec!["network".to_string()],
input_schema: Some(serde_json::json!({
"type": "object",
"oneOf": [
{
"properties": {
"action": { "const": "search" },
"query": {
"type": "object",
"properties": {
"query": { "type": "string" },
"engine": { "type": "string", "enum": ["google", "bing", "duckduckgo", "auto"] },
"depth": { "type": "string", "enum": ["quick", "standard", "deep"] },
"maxResults": { "type": "integer" }
},
"required": ["query"]
}
},
"required": ["action", "query"]
},
{
"properties": {
"action": { "const": "fetch" },
"url": { "type": "string" }
},
"required": ["action", "url"]
},
{
"properties": {
"action": { "const": "report" },
"query": { "$ref": "#/properties/query" }
},
"required": ["action", "query"]
}
]
})),
tags: vec!["research".to_string(), "web".to_string(), "search".to_string()],
enabled: true,
},
client: reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.user_agent("ZCLAW-Researcher/1.0")
.build()
.unwrap_or_else(|_| reqwest::Client::new()),
cache: Arc::new(RwLock::new(HashMap::new())),
}
}
/// Execute a web search
async fn execute_search(&self, query: &ResearchQuery) -> Result<Vec<SearchResult>> {
let start = std::time::Instant::now();
// Use DuckDuckGo as default search (no API key required)
let results = self.search_duckduckgo(&query.query, query.max_results).await?;
let duration = start.elapsed().as_millis() as u64;
tracing::info!(
target: "researcher",
query = %query.query,
duration_ms = duration,
results_count = results.len(),
"Search completed"
);
Ok(results)
}
/// Search using DuckDuckGo (no API key required)
async fn search_duckduckgo(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
let url = format!("https://api.duckduckgo.com/?q={}&format=json&no_html=1",
url_encode(query));
let response = self.client
.get(&url)
.send()
.await
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Search request failed: {}", e)))?;
let json: Value = response.json().await
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to parse search response: {}", e)))?;
let mut results = Vec::new();
// Parse DuckDuckGo Instant Answer
if let Some(abstract_text) = json.get("AbstractText").and_then(|v| v.as_str()) {
if !abstract_text.is_empty() {
results.push(SearchResult {
title: query.to_string(),
url: json.get("AbstractURL")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
snippet: abstract_text.to_string(),
source: json.get("AbstractSource")
.and_then(|v| v.as_str())
.unwrap_or("DuckDuckGo")
.to_string(),
relevance: 100,
content: None,
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
});
}
}
// Parse related topics
if let Some(related) = json.get("RelatedTopics").and_then(|v| v.as_array()) {
for item in related.iter().take(max_results) {
if let Some(obj) = item.as_object() {
results.push(SearchResult {
title: obj.get("Text")
.and_then(|v| v.as_str())
.unwrap_or("Related Topic")
.to_string(),
url: obj.get("FirstURL")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
snippet: obj.get("Text")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
source: "DuckDuckGo".to_string(),
relevance: 80,
content: None,
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
});
}
}
}
Ok(results)
}
/// Fetch content from a URL
async fn execute_fetch(&self, url: &str) -> Result<SearchResult> {
let start = std::time::Instant::now();
// Check cache first
{
let cache = self.cache.read().await;
if let Some(cached) = cache.get(url) {
if cached.content.is_some() {
return Ok(cached.clone());
}
}
}
let response = self.client
.get(url)
.send()
.await
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Fetch request failed: {}", e)))?;
let content_type = response.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or("");
let content = if content_type.contains("text/html") {
// Extract text from HTML
let html = response.text().await
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read HTML: {}", e)))?;
self.extract_text_from_html(&html)
} else if content_type.contains("text/") || content_type.contains("application/json") {
response.text().await
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read text: {}", e)))?
} else {
"[Binary content]".to_string()
};
let result = SearchResult {
title: url.to_string(),
url: url.to_string(),
snippet: content.chars().take(500).collect(),
source: url.to_string(),
relevance: 100,
content: Some(content),
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
};
// Cache the result
{
let mut cache = self.cache.write().await;
cache.insert(url.to_string(), result.clone());
}
let duration = start.elapsed().as_millis() as u64;
tracing::info!(
target: "researcher",
url = url,
duration_ms = duration,
"Fetch completed"
);
Ok(result)
}
/// Extract readable text from HTML
fn extract_text_from_html(&self, html: &str) -> String {
// Simple text extraction - remove HTML tags
let mut text = String::new();
let mut in_tag = false;
let mut in_script = false;
let mut in_style = false;
for c in html.chars() {
match c {
'<' => {
in_tag = true;
let remaining = html[text.len()..].to_lowercase();
if remaining.starts_with("<script") {
in_script = true;
} else if remaining.starts_with("<style") {
in_style = true;
}
}
'>' => {
in_tag = false;
let remaining = html[text.len()..].to_lowercase();
if remaining.starts_with("</script>") {
in_script = false;
} else if remaining.starts_with("</style>") {
in_style = false;
}
}
_ if in_tag => {}
_ if in_script || in_style => {}
' ' | '\n' | '\t' | '\r' => {
if !text.ends_with(' ') && !text.is_empty() {
text.push(' ');
}
}
_ => text.push(c),
}
}
// Limit length
if text.len() > 10000 {
text.truncate(10000);
text.push_str("...");
}
text.trim().to_string()
}
/// Generate a comprehensive research report
async fn execute_report(&self, query: &ResearchQuery) -> Result<ResearchReport> {
let start = std::time::Instant::now();
// First, execute search
let mut results = self.execute_search(query).await?;
// Fetch content for top results
let fetch_limit = match query.depth {
ResearchDepth::Quick => 1,
ResearchDepth::Standard => 3,
ResearchDepth::Deep => 5,
};
for result in results.iter_mut().take(fetch_limit) {
if !result.url.is_empty() {
match self.execute_fetch(&result.url).await {
Ok(fetched) => {
result.content = fetched.content;
result.fetched_at = fetched.fetched_at;
}
Err(e) => {
tracing::warn!(target: "researcher", error = %e, "Failed to fetch content");
}
}
}
}
// Extract key findings
let key_findings: Vec<String> = results.iter()
.take(5)
.filter_map(|r| {
r.content.as_ref().map(|c| {
c.split(". ")
.take(3)
.collect::<Vec<_>>()
.join(". ")
})
})
.collect();
// Extract related topics from snippets
let related_topics: Vec<String> = results.iter()
.filter_map(|r| {
if r.snippet.len() > 50 {
Some(r.title.clone())
} else {
None
}
})
.take(5)
.collect();
let duration = start.elapsed().as_millis() as u64;
Ok(ResearchReport {
query: query.query.clone(),
results,
summary: None, // Would require LLM integration
key_findings,
related_topics,
researched_at: chrono::Utc::now().to_rfc3339(),
duration_ms: duration,
})
}
}
impl Default for ResearcherHand {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Hand for ResearcherHand {
fn config(&self) -> &HandConfig {
&self.config
}
async fn execute(&self, _context: &HandContext, input: Value) -> Result<HandResult> {
let action: ResearcherAction = serde_json::from_value(input.clone())
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Invalid action: {}", e)))?;
let start = std::time::Instant::now();
let result = match action {
ResearcherAction::Search { query } => {
let results = self.execute_search(&query).await?;
json!({
"action": "search",
"query": query.query,
"results": results,
"duration_ms": start.elapsed().as_millis()
})
}
ResearcherAction::Fetch { url } => {
let result = self.execute_fetch(&url).await?;
json!({
"action": "fetch",
"url": url,
"result": result,
"duration_ms": start.elapsed().as_millis()
})
}
ResearcherAction::Summarize { urls } => {
let mut results = Vec::new();
for url in urls.iter().take(5) {
if let Ok(result) = self.execute_fetch(url).await {
results.push(result);
}
}
json!({
"action": "summarize",
"urls": urls,
"results": results,
"duration_ms": start.elapsed().as_millis()
})
}
ResearcherAction::Report { query } => {
let report = self.execute_report(&query).await?;
json!({
"action": "report",
"report": report
})
}
};
Ok(HandResult::success(result))
}
fn needs_approval(&self) -> bool {
false // Research operations are generally safe
}
fn check_dependencies(&self) -> Result<Vec<String>> {
// Network connectivity will be checked at runtime
Ok(Vec::new())
}
fn status(&self) -> crate::HandStatus {
crate::HandStatus::Idle
}
}
/// URL encoding helper (simple implementation)
fn url_encode(s: &str) -> String {
s.chars()
.map(|c| match c {
'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => c.to_string(),
_ => format!("%{:02X}", c as u32),
})
.collect()
}