Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
refactor: 统一Hands系统常量到单个源文件 refactor: 更新Hands中文名称和描述 fix: 修复技能市场在连接状态变化时重新加载 fix: 修复身份变更提案的错误处理逻辑 docs: 更新多个功能文档的验证状态和实现位置 docs: 更新Hands系统文档 test: 添加测试文件验证工作区路径
546 lines
18 KiB
Rust
546 lines
18 KiB
Rust
//! Researcher Hand - Deep research and analysis capabilities
|
|
//!
|
|
//! This hand provides web search, content fetching, and research synthesis.
|
|
|
|
use async_trait::async_trait;
|
|
use serde::{Deserialize, Serialize};
|
|
use serde_json::{json, Value};
|
|
use std::collections::HashMap;
|
|
use std::sync::Arc;
|
|
use tokio::sync::RwLock;
|
|
use zclaw_types::Result;
|
|
|
|
use crate::{Hand, HandConfig, HandContext, HandResult};
|
|
|
|
/// Search engine options
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(rename_all = "lowercase")]
|
|
pub enum SearchEngine {
|
|
Google,
|
|
Bing,
|
|
DuckDuckGo,
|
|
Auto,
|
|
}
|
|
|
|
impl Default for SearchEngine {
|
|
fn default() -> Self {
|
|
Self::Auto
|
|
}
|
|
}
|
|
|
|
/// Research depth level
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(rename_all = "lowercase")]
|
|
pub enum ResearchDepth {
|
|
Quick, // Fast search, top 3 results
|
|
Standard, // Normal search, top 10 results
|
|
Deep, // Comprehensive search, multiple sources
|
|
}
|
|
|
|
impl Default for ResearchDepth {
|
|
fn default() -> Self {
|
|
Self::Standard
|
|
}
|
|
}
|
|
|
|
/// Research query configuration
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(rename_all = "camelCase")]
|
|
pub struct ResearchQuery {
|
|
/// Search query
|
|
pub query: String,
|
|
/// Search engine to use
|
|
#[serde(default)]
|
|
pub engine: SearchEngine,
|
|
/// Research depth
|
|
#[serde(default)]
|
|
pub depth: ResearchDepth,
|
|
/// Maximum results to return
|
|
#[serde(default = "default_max_results")]
|
|
pub max_results: usize,
|
|
/// Include related topics
|
|
#[serde(default)]
|
|
pub include_related: bool,
|
|
/// Time limit in seconds
|
|
#[serde(default = "default_time_limit")]
|
|
pub time_limit_secs: u64,
|
|
}
|
|
|
|
fn default_max_results() -> usize { 10 }
|
|
fn default_time_limit() -> u64 { 60 }
|
|
|
|
/// Search result item
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(rename_all = "camelCase")]
|
|
pub struct SearchResult {
|
|
/// Title of the result
|
|
pub title: String,
|
|
/// URL
|
|
pub url: String,
|
|
/// Snippet/summary
|
|
pub snippet: String,
|
|
/// Source name
|
|
pub source: String,
|
|
/// Relevance score (0-100)
|
|
#[serde(default)]
|
|
pub relevance: u8,
|
|
/// Fetched content (if available)
|
|
#[serde(default)]
|
|
pub content: Option<String>,
|
|
/// Timestamp
|
|
#[serde(default)]
|
|
pub fetched_at: Option<String>,
|
|
}
|
|
|
|
/// Research report
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(rename_all = "camelCase")]
|
|
pub struct ResearchReport {
|
|
/// Original query
|
|
pub query: String,
|
|
/// Search results
|
|
pub results: Vec<SearchResult>,
|
|
/// Synthesized summary
|
|
#[serde(default)]
|
|
pub summary: Option<String>,
|
|
/// Key findings
|
|
#[serde(default)]
|
|
pub key_findings: Vec<String>,
|
|
/// Related topics discovered
|
|
#[serde(default)]
|
|
pub related_topics: Vec<String>,
|
|
/// Research timestamp
|
|
pub researched_at: String,
|
|
/// Total time spent (ms)
|
|
pub duration_ms: u64,
|
|
}
|
|
|
|
/// Researcher action types
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
#[serde(tag = "action")]
|
|
pub enum ResearcherAction {
|
|
#[serde(rename = "search")]
|
|
Search { query: ResearchQuery },
|
|
#[serde(rename = "fetch")]
|
|
Fetch { url: String },
|
|
#[serde(rename = "summarize")]
|
|
Summarize { urls: Vec<String> },
|
|
#[serde(rename = "report")]
|
|
Report { query: ResearchQuery },
|
|
}
|
|
|
|
/// Researcher Hand implementation
|
|
pub struct ResearcherHand {
|
|
config: HandConfig,
|
|
client: reqwest::Client,
|
|
cache: Arc<RwLock<HashMap<String, SearchResult>>>,
|
|
}
|
|
|
|
impl ResearcherHand {
|
|
/// Create a new researcher hand
|
|
pub fn new() -> Self {
|
|
Self {
|
|
config: HandConfig {
|
|
id: "researcher".to_string(),
|
|
name: "研究员".to_string(),
|
|
description: "深度研究和分析能力,支持网络搜索和内容获取".to_string(),
|
|
needs_approval: false,
|
|
dependencies: vec!["network".to_string()],
|
|
input_schema: Some(serde_json::json!({
|
|
"type": "object",
|
|
"oneOf": [
|
|
{
|
|
"properties": {
|
|
"action": { "const": "search" },
|
|
"query": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": { "type": "string" },
|
|
"engine": { "type": "string", "enum": ["google", "bing", "duckduckgo", "auto"] },
|
|
"depth": { "type": "string", "enum": ["quick", "standard", "deep"] },
|
|
"maxResults": { "type": "integer" }
|
|
},
|
|
"required": ["query"]
|
|
}
|
|
},
|
|
"required": ["action", "query"]
|
|
},
|
|
{
|
|
"properties": {
|
|
"action": { "const": "fetch" },
|
|
"url": { "type": "string" }
|
|
},
|
|
"required": ["action", "url"]
|
|
},
|
|
{
|
|
"properties": {
|
|
"action": { "const": "report" },
|
|
"query": { "$ref": "#/properties/query" }
|
|
},
|
|
"required": ["action", "query"]
|
|
}
|
|
]
|
|
})),
|
|
tags: vec!["research".to_string(), "web".to_string(), "search".to_string()],
|
|
enabled: true,
|
|
},
|
|
client: reqwest::Client::builder()
|
|
.timeout(std::time::Duration::from_secs(30))
|
|
.user_agent("ZCLAW-Researcher/1.0")
|
|
.build()
|
|
.unwrap_or_else(|_| reqwest::Client::new()),
|
|
cache: Arc::new(RwLock::new(HashMap::new())),
|
|
}
|
|
}
|
|
|
|
/// Execute a web search
|
|
async fn execute_search(&self, query: &ResearchQuery) -> Result<Vec<SearchResult>> {
|
|
let start = std::time::Instant::now();
|
|
|
|
// Use DuckDuckGo as default search (no API key required)
|
|
let results = self.search_duckduckgo(&query.query, query.max_results).await?;
|
|
|
|
let duration = start.elapsed().as_millis() as u64;
|
|
tracing::info!(
|
|
target: "researcher",
|
|
query = %query.query,
|
|
duration_ms = duration,
|
|
results_count = results.len(),
|
|
"Search completed"
|
|
);
|
|
|
|
Ok(results)
|
|
}
|
|
|
|
/// Search using DuckDuckGo (no API key required)
|
|
async fn search_duckduckgo(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
|
|
let url = format!("https://api.duckduckgo.com/?q={}&format=json&no_html=1",
|
|
url_encode(query));
|
|
|
|
let response = self.client
|
|
.get(&url)
|
|
.send()
|
|
.await
|
|
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Search request failed: {}", e)))?;
|
|
|
|
let json: Value = response.json().await
|
|
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to parse search response: {}", e)))?;
|
|
|
|
let mut results = Vec::new();
|
|
|
|
// Parse DuckDuckGo Instant Answer
|
|
if let Some(abstract_text) = json.get("AbstractText").and_then(|v| v.as_str()) {
|
|
if !abstract_text.is_empty() {
|
|
results.push(SearchResult {
|
|
title: query.to_string(),
|
|
url: json.get("AbstractURL")
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
snippet: abstract_text.to_string(),
|
|
source: json.get("AbstractSource")
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("DuckDuckGo")
|
|
.to_string(),
|
|
relevance: 100,
|
|
content: None,
|
|
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
|
});
|
|
}
|
|
}
|
|
|
|
// Parse related topics
|
|
if let Some(related) = json.get("RelatedTopics").and_then(|v| v.as_array()) {
|
|
for item in related.iter().take(max_results) {
|
|
if let Some(obj) = item.as_object() {
|
|
results.push(SearchResult {
|
|
title: obj.get("Text")
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("Related Topic")
|
|
.to_string(),
|
|
url: obj.get("FirstURL")
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
snippet: obj.get("Text")
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("")
|
|
.to_string(),
|
|
source: "DuckDuckGo".to_string(),
|
|
relevance: 80,
|
|
content: None,
|
|
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(results)
|
|
}
|
|
|
|
/// Fetch content from a URL
|
|
async fn execute_fetch(&self, url: &str) -> Result<SearchResult> {
|
|
let start = std::time::Instant::now();
|
|
|
|
// Check cache first
|
|
{
|
|
let cache = self.cache.read().await;
|
|
if let Some(cached) = cache.get(url) {
|
|
if cached.content.is_some() {
|
|
return Ok(cached.clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
let response = self.client
|
|
.get(url)
|
|
.send()
|
|
.await
|
|
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Fetch request failed: {}", e)))?;
|
|
|
|
let content_type = response.headers()
|
|
.get(reqwest::header::CONTENT_TYPE)
|
|
.and_then(|v| v.to_str().ok())
|
|
.unwrap_or("");
|
|
|
|
let content = if content_type.contains("text/html") {
|
|
// Extract text from HTML
|
|
let html = response.text().await
|
|
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read HTML: {}", e)))?;
|
|
self.extract_text_from_html(&html)
|
|
} else if content_type.contains("text/") || content_type.contains("application/json") {
|
|
response.text().await
|
|
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read text: {}", e)))?
|
|
} else {
|
|
"[Binary content]".to_string()
|
|
};
|
|
|
|
let result = SearchResult {
|
|
title: url.to_string(),
|
|
url: url.to_string(),
|
|
snippet: content.chars().take(500).collect(),
|
|
source: url.to_string(),
|
|
relevance: 100,
|
|
content: Some(content),
|
|
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
|
};
|
|
|
|
// Cache the result
|
|
{
|
|
let mut cache = self.cache.write().await;
|
|
cache.insert(url.to_string(), result.clone());
|
|
}
|
|
|
|
let duration = start.elapsed().as_millis() as u64;
|
|
tracing::info!(
|
|
target: "researcher",
|
|
url = url,
|
|
duration_ms = duration,
|
|
"Fetch completed"
|
|
);
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Extract readable text from HTML
|
|
fn extract_text_from_html(&self, html: &str) -> String {
|
|
// Simple text extraction - remove HTML tags
|
|
let mut text = String::new();
|
|
let mut in_tag = false;
|
|
let mut in_script = false;
|
|
let mut in_style = false;
|
|
|
|
for c in html.chars() {
|
|
match c {
|
|
'<' => {
|
|
in_tag = true;
|
|
let remaining = html[text.len()..].to_lowercase();
|
|
if remaining.starts_with("<script") {
|
|
in_script = true;
|
|
} else if remaining.starts_with("<style") {
|
|
in_style = true;
|
|
}
|
|
}
|
|
'>' => {
|
|
in_tag = false;
|
|
let remaining = html[text.len()..].to_lowercase();
|
|
if remaining.starts_with("</script>") {
|
|
in_script = false;
|
|
} else if remaining.starts_with("</style>") {
|
|
in_style = false;
|
|
}
|
|
}
|
|
_ if in_tag => {}
|
|
_ if in_script || in_style => {}
|
|
' ' | '\n' | '\t' | '\r' => {
|
|
if !text.ends_with(' ') && !text.is_empty() {
|
|
text.push(' ');
|
|
}
|
|
}
|
|
_ => text.push(c),
|
|
}
|
|
}
|
|
|
|
// Limit length
|
|
if text.len() > 10000 {
|
|
text.truncate(10000);
|
|
text.push_str("...");
|
|
}
|
|
|
|
text.trim().to_string()
|
|
}
|
|
|
|
/// Generate a comprehensive research report
|
|
async fn execute_report(&self, query: &ResearchQuery) -> Result<ResearchReport> {
|
|
let start = std::time::Instant::now();
|
|
|
|
// First, execute search
|
|
let mut results = self.execute_search(query).await?;
|
|
|
|
// Fetch content for top results
|
|
let fetch_limit = match query.depth {
|
|
ResearchDepth::Quick => 1,
|
|
ResearchDepth::Standard => 3,
|
|
ResearchDepth::Deep => 5,
|
|
};
|
|
|
|
for result in results.iter_mut().take(fetch_limit) {
|
|
if !result.url.is_empty() {
|
|
match self.execute_fetch(&result.url).await {
|
|
Ok(fetched) => {
|
|
result.content = fetched.content;
|
|
result.fetched_at = fetched.fetched_at;
|
|
}
|
|
Err(e) => {
|
|
tracing::warn!(target: "researcher", error = %e, "Failed to fetch content");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Extract key findings
|
|
let key_findings: Vec<String> = results.iter()
|
|
.take(5)
|
|
.filter_map(|r| {
|
|
r.content.as_ref().map(|c| {
|
|
c.split(". ")
|
|
.take(3)
|
|
.collect::<Vec<_>>()
|
|
.join(". ")
|
|
})
|
|
})
|
|
.collect();
|
|
|
|
// Extract related topics from snippets
|
|
let related_topics: Vec<String> = results.iter()
|
|
.filter_map(|r| {
|
|
if r.snippet.len() > 50 {
|
|
Some(r.title.clone())
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.take(5)
|
|
.collect();
|
|
|
|
let duration = start.elapsed().as_millis() as u64;
|
|
|
|
Ok(ResearchReport {
|
|
query: query.query.clone(),
|
|
results,
|
|
summary: None, // Would require LLM integration
|
|
key_findings,
|
|
related_topics,
|
|
researched_at: chrono::Utc::now().to_rfc3339(),
|
|
duration_ms: duration,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl Default for ResearcherHand {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Hand for ResearcherHand {
|
|
fn config(&self) -> &HandConfig {
|
|
&self.config
|
|
}
|
|
|
|
async fn execute(&self, _context: &HandContext, input: Value) -> Result<HandResult> {
|
|
let action: ResearcherAction = serde_json::from_value(input.clone())
|
|
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Invalid action: {}", e)))?;
|
|
|
|
let start = std::time::Instant::now();
|
|
|
|
let result = match action {
|
|
ResearcherAction::Search { query } => {
|
|
let results = self.execute_search(&query).await?;
|
|
json!({
|
|
"action": "search",
|
|
"query": query.query,
|
|
"results": results,
|
|
"duration_ms": start.elapsed().as_millis()
|
|
})
|
|
}
|
|
ResearcherAction::Fetch { url } => {
|
|
let result = self.execute_fetch(&url).await?;
|
|
json!({
|
|
"action": "fetch",
|
|
"url": url,
|
|
"result": result,
|
|
"duration_ms": start.elapsed().as_millis()
|
|
})
|
|
}
|
|
ResearcherAction::Summarize { urls } => {
|
|
let mut results = Vec::new();
|
|
for url in urls.iter().take(5) {
|
|
if let Ok(result) = self.execute_fetch(url).await {
|
|
results.push(result);
|
|
}
|
|
}
|
|
json!({
|
|
"action": "summarize",
|
|
"urls": urls,
|
|
"results": results,
|
|
"duration_ms": start.elapsed().as_millis()
|
|
})
|
|
}
|
|
ResearcherAction::Report { query } => {
|
|
let report = self.execute_report(&query).await?;
|
|
json!({
|
|
"action": "report",
|
|
"report": report
|
|
})
|
|
}
|
|
};
|
|
|
|
Ok(HandResult::success(result))
|
|
}
|
|
|
|
fn needs_approval(&self) -> bool {
|
|
false // Research operations are generally safe
|
|
}
|
|
|
|
fn check_dependencies(&self) -> Result<Vec<String>> {
|
|
// Network connectivity will be checked at runtime
|
|
Ok(Vec::new())
|
|
}
|
|
|
|
fn status(&self) -> crate::HandStatus {
|
|
crate::HandStatus::Idle
|
|
}
|
|
}
|
|
|
|
/// URL encoding helper (simple implementation)
|
|
fn url_encode(s: &str) -> String {
|
|
s.chars()
|
|
.map(|c| match c {
|
|
'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => c.to_string(),
|
|
_ => format!("%{:02X}", c as u32),
|
|
})
|
|
.collect()
|
|
}
|