feat(hands): implement 4 new Hands and fix BrowserHand registration
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
- Add ResearcherHand: DuckDuckGo search, web fetch, report generation - Add CollectorHand: data collection, aggregation, multiple output formats - Add ClipHand: video processing (trim, convert, thumbnail, concat) - Add TwitterHand: Twitter/X automation (tweet, retweet, like, search) - Fix BrowserHand not registered in Kernel (critical bug) - Add HandError variant to ZclawError enum - Update documentation: 9/11 Hands implemented (82%) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
545
crates/zclaw-hands/src/hands/researcher.rs
Normal file
545
crates/zclaw-hands/src/hands/researcher.rs
Normal file
@@ -0,0 +1,545 @@
|
||||
//! Researcher Hand - Deep research and analysis capabilities
|
||||
//!
|
||||
//! This hand provides web search, content fetching, and research synthesis.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use zclaw_types::Result;
|
||||
|
||||
use crate::{Hand, HandConfig, HandContext, HandResult};
|
||||
|
||||
/// Search engine options
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum SearchEngine {
|
||||
Google,
|
||||
Bing,
|
||||
DuckDuckGo,
|
||||
Auto,
|
||||
}
|
||||
|
||||
impl Default for SearchEngine {
|
||||
fn default() -> Self {
|
||||
Self::Auto
|
||||
}
|
||||
}
|
||||
|
||||
/// Research depth level
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ResearchDepth {
|
||||
Quick, // Fast search, top 3 results
|
||||
Standard, // Normal search, top 10 results
|
||||
Deep, // Comprehensive search, multiple sources
|
||||
}
|
||||
|
||||
impl Default for ResearchDepth {
|
||||
fn default() -> Self {
|
||||
Self::Standard
|
||||
}
|
||||
}
|
||||
|
||||
/// Research query configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResearchQuery {
|
||||
/// Search query
|
||||
pub query: String,
|
||||
/// Search engine to use
|
||||
#[serde(default)]
|
||||
pub engine: SearchEngine,
|
||||
/// Research depth
|
||||
#[serde(default)]
|
||||
pub depth: ResearchDepth,
|
||||
/// Maximum results to return
|
||||
#[serde(default = "default_max_results")]
|
||||
pub max_results: usize,
|
||||
/// Include related topics
|
||||
#[serde(default)]
|
||||
pub include_related: bool,
|
||||
/// Time limit in seconds
|
||||
#[serde(default = "default_time_limit")]
|
||||
pub time_limit_secs: u64,
|
||||
}
|
||||
|
||||
fn default_max_results() -> usize { 10 }
|
||||
fn default_time_limit() -> u64 { 60 }
|
||||
|
||||
/// Search result item
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SearchResult {
|
||||
/// Title of the result
|
||||
pub title: String,
|
||||
/// URL
|
||||
pub url: String,
|
||||
/// Snippet/summary
|
||||
pub snippet: String,
|
||||
/// Source name
|
||||
pub source: String,
|
||||
/// Relevance score (0-100)
|
||||
#[serde(default)]
|
||||
pub relevance: u8,
|
||||
/// Fetched content (if available)
|
||||
#[serde(default)]
|
||||
pub content: Option<String>,
|
||||
/// Timestamp
|
||||
#[serde(default)]
|
||||
pub fetched_at: Option<String>,
|
||||
}
|
||||
|
||||
/// Research report
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ResearchReport {
|
||||
/// Original query
|
||||
pub query: String,
|
||||
/// Search results
|
||||
pub results: Vec<SearchResult>,
|
||||
/// Synthesized summary
|
||||
#[serde(default)]
|
||||
pub summary: Option<String>,
|
||||
/// Key findings
|
||||
#[serde(default)]
|
||||
pub key_findings: Vec<String>,
|
||||
/// Related topics discovered
|
||||
#[serde(default)]
|
||||
pub related_topics: Vec<String>,
|
||||
/// Research timestamp
|
||||
pub researched_at: String,
|
||||
/// Total time spent (ms)
|
||||
pub duration_ms: u64,
|
||||
}
|
||||
|
||||
/// Researcher action types
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "action")]
|
||||
pub enum ResearcherAction {
|
||||
#[serde(rename = "search")]
|
||||
Search { query: ResearchQuery },
|
||||
#[serde(rename = "fetch")]
|
||||
Fetch { url: String },
|
||||
#[serde(rename = "summarize")]
|
||||
Summarize { urls: Vec<String> },
|
||||
#[serde(rename = "report")]
|
||||
Report { query: ResearchQuery },
|
||||
}
|
||||
|
||||
/// Researcher Hand implementation
|
||||
pub struct ResearcherHand {
|
||||
config: HandConfig,
|
||||
client: reqwest::Client,
|
||||
cache: Arc<RwLock<HashMap<String, SearchResult>>>,
|
||||
}
|
||||
|
||||
impl ResearcherHand {
|
||||
/// Create a new researcher hand
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
config: HandConfig {
|
||||
id: "researcher".to_string(),
|
||||
name: "Researcher".to_string(),
|
||||
description: "Deep research and analysis capabilities with web search and content fetching".to_string(),
|
||||
needs_approval: false,
|
||||
dependencies: vec!["network".to_string()],
|
||||
input_schema: Some(serde_json::json!({
|
||||
"type": "object",
|
||||
"oneOf": [
|
||||
{
|
||||
"properties": {
|
||||
"action": { "const": "search" },
|
||||
"query": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": { "type": "string" },
|
||||
"engine": { "type": "string", "enum": ["google", "bing", "duckduckgo", "auto"] },
|
||||
"depth": { "type": "string", "enum": ["quick", "standard", "deep"] },
|
||||
"maxResults": { "type": "integer" }
|
||||
},
|
||||
"required": ["query"]
|
||||
}
|
||||
},
|
||||
"required": ["action", "query"]
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"action": { "const": "fetch" },
|
||||
"url": { "type": "string" }
|
||||
},
|
||||
"required": ["action", "url"]
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"action": { "const": "report" },
|
||||
"query": { "$ref": "#/properties/query" }
|
||||
},
|
||||
"required": ["action", "query"]
|
||||
}
|
||||
]
|
||||
})),
|
||||
tags: vec!["research".to_string(), "web".to_string(), "search".to_string()],
|
||||
enabled: true,
|
||||
},
|
||||
client: reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.user_agent("ZCLAW-Researcher/1.0")
|
||||
.build()
|
||||
.unwrap_or_else(|_| reqwest::Client::new()),
|
||||
cache: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a web search
|
||||
async fn execute_search(&self, query: &ResearchQuery) -> Result<Vec<SearchResult>> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Use DuckDuckGo as default search (no API key required)
|
||||
let results = self.search_duckduckgo(&query.query, query.max_results).await?;
|
||||
|
||||
let duration = start.elapsed().as_millis() as u64;
|
||||
tracing::info!(
|
||||
target: "researcher",
|
||||
query = %query.query,
|
||||
duration_ms = duration,
|
||||
results_count = results.len(),
|
||||
"Search completed"
|
||||
);
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Search using DuckDuckGo (no API key required)
|
||||
async fn search_duckduckgo(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
|
||||
let url = format!("https://api.duckduckgo.com/?q={}&format=json&no_html=1",
|
||||
url_encode(query));
|
||||
|
||||
let response = self.client
|
||||
.get(&url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Search request failed: {}", e)))?;
|
||||
|
||||
let json: Value = response.json().await
|
||||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to parse search response: {}", e)))?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
// Parse DuckDuckGo Instant Answer
|
||||
if let Some(abstract_text) = json.get("AbstractText").and_then(|v| v.as_str()) {
|
||||
if !abstract_text.is_empty() {
|
||||
results.push(SearchResult {
|
||||
title: query.to_string(),
|
||||
url: json.get("AbstractURL")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
snippet: abstract_text.to_string(),
|
||||
source: json.get("AbstractSource")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("DuckDuckGo")
|
||||
.to_string(),
|
||||
relevance: 100,
|
||||
content: None,
|
||||
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Parse related topics
|
||||
if let Some(related) = json.get("RelatedTopics").and_then(|v| v.as_array()) {
|
||||
for item in related.iter().take(max_results) {
|
||||
if let Some(obj) = item.as_object() {
|
||||
results.push(SearchResult {
|
||||
title: obj.get("Text")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("Related Topic")
|
||||
.to_string(),
|
||||
url: obj.get("FirstURL")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
snippet: obj.get("Text")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
source: "DuckDuckGo".to_string(),
|
||||
relevance: 80,
|
||||
content: None,
|
||||
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Fetch content from a URL
|
||||
async fn execute_fetch(&self, url: &str) -> Result<SearchResult> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Check cache first
|
||||
{
|
||||
let cache = self.cache.read().await;
|
||||
if let Some(cached) = cache.get(url) {
|
||||
if cached.content.is_some() {
|
||||
return Ok(cached.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let response = self.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Fetch request failed: {}", e)))?;
|
||||
|
||||
let content_type = response.headers()
|
||||
.get(reqwest::header::CONTENT_TYPE)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
let content = if content_type.contains("text/html") {
|
||||
// Extract text from HTML
|
||||
let html = response.text().await
|
||||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read HTML: {}", e)))?;
|
||||
self.extract_text_from_html(&html)
|
||||
} else if content_type.contains("text/") || content_type.contains("application/json") {
|
||||
response.text().await
|
||||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Failed to read text: {}", e)))?
|
||||
} else {
|
||||
"[Binary content]".to_string()
|
||||
};
|
||||
|
||||
let result = SearchResult {
|
||||
title: url.to_string(),
|
||||
url: url.to_string(),
|
||||
snippet: content.chars().take(500).collect(),
|
||||
source: url.to_string(),
|
||||
relevance: 100,
|
||||
content: Some(content),
|
||||
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
};
|
||||
|
||||
// Cache the result
|
||||
{
|
||||
let mut cache = self.cache.write().await;
|
||||
cache.insert(url.to_string(), result.clone());
|
||||
}
|
||||
|
||||
let duration = start.elapsed().as_millis() as u64;
|
||||
tracing::info!(
|
||||
target: "researcher",
|
||||
url = url,
|
||||
duration_ms = duration,
|
||||
"Fetch completed"
|
||||
);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Extract readable text from HTML
|
||||
fn extract_text_from_html(&self, html: &str) -> String {
|
||||
// Simple text extraction - remove HTML tags
|
||||
let mut text = String::new();
|
||||
let mut in_tag = false;
|
||||
let mut in_script = false;
|
||||
let mut in_style = false;
|
||||
|
||||
for c in html.chars() {
|
||||
match c {
|
||||
'<' => {
|
||||
in_tag = true;
|
||||
let remaining = html[text.len()..].to_lowercase();
|
||||
if remaining.starts_with("<script") {
|
||||
in_script = true;
|
||||
} else if remaining.starts_with("<style") {
|
||||
in_style = true;
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
in_tag = false;
|
||||
let remaining = html[text.len()..].to_lowercase();
|
||||
if remaining.starts_with("</script>") {
|
||||
in_script = false;
|
||||
} else if remaining.starts_with("</style>") {
|
||||
in_style = false;
|
||||
}
|
||||
}
|
||||
_ if in_tag => {}
|
||||
_ if in_script || in_style => {}
|
||||
' ' | '\n' | '\t' | '\r' => {
|
||||
if !text.ends_with(' ') && !text.is_empty() {
|
||||
text.push(' ');
|
||||
}
|
||||
}
|
||||
_ => text.push(c),
|
||||
}
|
||||
}
|
||||
|
||||
// Limit length
|
||||
if text.len() > 10000 {
|
||||
text.truncate(10000);
|
||||
text.push_str("...");
|
||||
}
|
||||
|
||||
text.trim().to_string()
|
||||
}
|
||||
|
||||
/// Generate a comprehensive research report
|
||||
async fn execute_report(&self, query: &ResearchQuery) -> Result<ResearchReport> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// First, execute search
|
||||
let mut results = self.execute_search(query).await?;
|
||||
|
||||
// Fetch content for top results
|
||||
let fetch_limit = match query.depth {
|
||||
ResearchDepth::Quick => 1,
|
||||
ResearchDepth::Standard => 3,
|
||||
ResearchDepth::Deep => 5,
|
||||
};
|
||||
|
||||
for result in results.iter_mut().take(fetch_limit) {
|
||||
if !result.url.is_empty() {
|
||||
match self.execute_fetch(&result.url).await {
|
||||
Ok(fetched) => {
|
||||
result.content = fetched.content;
|
||||
result.fetched_at = fetched.fetched_at;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(target: "researcher", error = %e, "Failed to fetch content");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract key findings
|
||||
let key_findings: Vec<String> = results.iter()
|
||||
.take(5)
|
||||
.filter_map(|r| {
|
||||
r.content.as_ref().map(|c| {
|
||||
c.split(". ")
|
||||
.take(3)
|
||||
.collect::<Vec<_>>()
|
||||
.join(". ")
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Extract related topics from snippets
|
||||
let related_topics: Vec<String> = results.iter()
|
||||
.filter_map(|r| {
|
||||
if r.snippet.len() > 50 {
|
||||
Some(r.title.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.take(5)
|
||||
.collect();
|
||||
|
||||
let duration = start.elapsed().as_millis() as u64;
|
||||
|
||||
Ok(ResearchReport {
|
||||
query: query.query.clone(),
|
||||
results,
|
||||
summary: None, // Would require LLM integration
|
||||
key_findings,
|
||||
related_topics,
|
||||
researched_at: chrono::Utc::now().to_rfc3339(),
|
||||
duration_ms: duration,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ResearcherHand {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Hand for ResearcherHand {
|
||||
fn config(&self) -> &HandConfig {
|
||||
&self.config
|
||||
}
|
||||
|
||||
async fn execute(&self, _context: &HandContext, input: Value) -> Result<HandResult> {
|
||||
let action: ResearcherAction = serde_json::from_value(input.clone())
|
||||
.map_err(|e| zclaw_types::ZclawError::HandError(format!("Invalid action: {}", e)))?;
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let result = match action {
|
||||
ResearcherAction::Search { query } => {
|
||||
let results = self.execute_search(&query).await?;
|
||||
json!({
|
||||
"action": "search",
|
||||
"query": query.query,
|
||||
"results": results,
|
||||
"duration_ms": start.elapsed().as_millis()
|
||||
})
|
||||
}
|
||||
ResearcherAction::Fetch { url } => {
|
||||
let result = self.execute_fetch(&url).await?;
|
||||
json!({
|
||||
"action": "fetch",
|
||||
"url": url,
|
||||
"result": result,
|
||||
"duration_ms": start.elapsed().as_millis()
|
||||
})
|
||||
}
|
||||
ResearcherAction::Summarize { urls } => {
|
||||
let mut results = Vec::new();
|
||||
for url in urls.iter().take(5) {
|
||||
if let Ok(result) = self.execute_fetch(url).await {
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
json!({
|
||||
"action": "summarize",
|
||||
"urls": urls,
|
||||
"results": results,
|
||||
"duration_ms": start.elapsed().as_millis()
|
||||
})
|
||||
}
|
||||
ResearcherAction::Report { query } => {
|
||||
let report = self.execute_report(&query).await?;
|
||||
json!({
|
||||
"action": "report",
|
||||
"report": report
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
Ok(HandResult::success(result))
|
||||
}
|
||||
|
||||
fn needs_approval(&self) -> bool {
|
||||
false // Research operations are generally safe
|
||||
}
|
||||
|
||||
fn check_dependencies(&self) -> Result<Vec<String>> {
|
||||
// Network connectivity will be checked at runtime
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
fn status(&self) -> crate::HandStatus {
|
||||
crate::HandStatus::Idle
|
||||
}
|
||||
}
|
||||
|
||||
/// URL encoding helper (simple implementation)
|
||||
fn url_encode(s: &str) -> String {
|
||||
s.chars()
|
||||
.map(|c| match c {
|
||||
'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => c.to_string(),
|
||||
_ => format!("%{:02X}", c as u32),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
Reference in New Issue
Block a user