From 0fd981905dfbfb1ff26d55f4a7bb0d6f0e71b6a8 Mon Sep 17 00:00:00 2001 From: iven Date: Wed, 22 Apr 2026 10:52:13 +0800 Subject: [PATCH] =?UTF-8?q?fix(hands):=20=E9=9B=86=E6=88=90=20SearXNG=20?= =?UTF-8?q?=E5=85=83=E6=90=9C=E7=B4=A2=E5=BC=95=E6=93=8E=20=E2=80=94=20?= =?UTF-8?q?=E6=9B=BF=E6=8D=A2=E4=B8=8D=E5=8F=AF=E7=94=A8=E7=9A=84=20DuckDu?= =?UTF-8?q?ckGo=20Instant=20Answer=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ResearcherHand 新增 search_searxng() 方法,调用 SearXNG JSON API 聚合 70+ 搜索引擎 - SearchEngine 枚举增加 SearXNG 变体,路由逻辑按配置分发搜索后端 - Auto 模式: SearXNG 优先 → DuckDuckGo fallback - config.toml [tools.web.search] 新增 searxng_url/searxng_timeout 配置 - docker-compose.yml 新增 SearXNG 服务容器 (searxng-config/settings.yml) - 新增 6 个 SearXNG 相关单元测试 (响应解析/URL构造/分数归一化/配置加载) - 验证: 124 tests PASS, workspace 0 warnings --- Cargo.lock | 1 + config/config.toml | 4 +- crates/zclaw-hands/Cargo.toml | 1 + crates/zclaw-hands/src/hands/researcher.rs | 329 ++++++++++++++++++++- docker-compose.yml | 19 ++ searxng-config/settings.yml | 48 +++ 6 files changed, 396 insertions(+), 6 deletions(-) create mode 100644 searxng-config/settings.yml diff --git a/Cargo.lock b/Cargo.lock index b62a505..86527aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9491,6 +9491,7 @@ dependencies = [ "serde_json", "thiserror 2.0.18", "tokio", + "toml 0.8.2", "tracing", "uuid", "zclaw-runtime", diff --git a/config/config.toml b/config/config.toml index 6c5de58..6c2be76 100644 --- a/config/config.toml +++ b/config/config.toml @@ -223,8 +223,10 @@ timeout = "30s" [tools.web] [tools.web.search] enabled = true -default_engine = "duckduckgo" +default_engine = "searxng" max_results = 10 +searxng_url = "http://localhost:8888" +searxng_timeout = 15 # File system tool [tools.fs] diff --git a/crates/zclaw-hands/Cargo.toml b/crates/zclaw-hands/Cargo.toml index 40a3c78..2298074 100644 --- a/crates/zclaw-hands/Cargo.toml +++ b/crates/zclaw-hands/Cargo.toml @@ -22,3 +22,4 @@ async-trait = { workspace = true } reqwest = { workspace = true } base64 = { workspace = true } dirs = { workspace = true } +toml = { workspace = true } diff --git a/crates/zclaw-hands/src/hands/researcher.rs b/crates/zclaw-hands/src/hands/researcher.rs index 113dec4..981d643 100644 --- a/crates/zclaw-hands/src/hands/researcher.rs +++ b/crates/zclaw-hands/src/hands/researcher.rs @@ -16,6 +16,7 @@ use crate::{Hand, HandConfig, HandContext, HandResult}; #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum SearchEngine { + SearXNG, Google, Bing, DuckDuckGo, @@ -28,6 +29,83 @@ impl Default for SearchEngine { } } +/// Search configuration loaded from config/config.toml +#[derive(Debug, Clone)] +struct SearchConfig { + default_engine: SearchEngine, + searxng_url: String, + timeout_secs: u64, +} + +impl Default for SearchConfig { + fn default() -> Self { + Self { + default_engine: SearchEngine::Auto, + searxng_url: "http://localhost:8888".to_string(), + timeout_secs: 15, + } + } +} + +impl SearchConfig { + fn load() -> Self { + let path = "config/config.toml"; + let content = match std::fs::read_to_string(path) { + Ok(c) => c, + Err(_) => return Self::default(), + }; + + #[derive(Deserialize)] + struct ToolsWebSearch { + default_engine: Option, + #[allow(dead_code)] + max_results: Option, + searxng_url: Option, + searxng_timeout: Option, + } + + #[derive(Deserialize)] + struct ToolsWeb { + search: Option, + } + + #[derive(Deserialize)] + struct Tools { + web: Option, + } + + #[derive(Deserialize)] + struct Config { + tools: Option, + } + + let config: Config = match toml::from_str(&content) { + Ok(c) => c, + Err(_) => return Self::default(), + }; + + let search = config.tools + .and_then(|t| t.web) + .and_then(|w| w.search); + + match search { + Some(s) => { + let engine = s.default_engine + .as_deref() + .and_then(|e| serde_json::from_str(&format!("\"{}\"", e)).ok()) + .unwrap_or_default(); + Self { + default_engine: engine, + searxng_url: s.searxng_url + .unwrap_or_else(|| "http://localhost:8888".to_string()), + timeout_secs: s.searxng_timeout.unwrap_or(15), + } + } + None => Self::default(), + } + } +} + /// Research depth level #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] @@ -132,6 +210,7 @@ pub enum ResearcherAction { /// Researcher Hand implementation pub struct ResearcherHand { config: HandConfig, + search_config: SearchConfig, client: reqwest::Client, cache: Arc>>, } @@ -156,7 +235,7 @@ impl ResearcherHand { "type": "object", "properties": { "query": { "type": "string" }, - "engine": { "type": "string", "enum": ["google", "bing", "duckduckgo", "auto"] }, + "engine": { "type": "string", "enum": ["searxng", "google", "bing", "duckduckgo", "auto"] }, "depth": { "type": "string", "enum": ["quick", "standard", "deep"] }, "maxResults": { "type": "integer" } }, @@ -186,6 +265,7 @@ impl ResearcherHand { max_concurrent: 0, timeout_secs: 0, }, + search_config: SearchConfig::load(), client: reqwest::Client::builder() .timeout(std::time::Duration::from_secs(30)) .user_agent("ZCLAW-Researcher/1.0") @@ -195,17 +275,42 @@ impl ResearcherHand { } } - /// Execute a web search + /// Execute a web search — route to the configured backend async fn execute_search(&self, query: &ResearchQuery) -> Result> { let start = std::time::Instant::now(); - // Use DuckDuckGo as default search (no API key required) - let results = self.search_duckduckgo(&query.query, query.max_results).await?; + let engine = match &query.engine { + SearchEngine::Auto => &self.search_config.default_engine, + other => other, + }; + + let results = match engine { + SearchEngine::SearXNG | SearchEngine::Auto => { + match self.search_searxng(&query.query, query.max_results).await { + Ok(r) if !r.is_empty() => r, + _ => { + tracing::warn!(target: "researcher", "SearXNG failed or empty, falling back to DuckDuckGo"); + self.search_duckduckgo(&query.query, query.max_results).await? + } + } + } + SearchEngine::DuckDuckGo => { + self.search_duckduckgo(&query.query, query.max_results).await? + } + SearchEngine::Google | SearchEngine::Bing => { + // Google/Bing not yet implemented, fall back to SearXNG which aggregates them + match self.search_searxng(&query.query, query.max_results).await { + Ok(r) if !r.is_empty() => r, + _ => self.search_duckduckgo(&query.query, query.max_results).await?, + } + } + }; let duration = start.elapsed().as_millis() as u64; tracing::info!( target: "researcher", query = %query.query, + engine = ?engine, duration_ms = duration, results_count = results.len(), "Search completed" @@ -214,6 +319,92 @@ impl ResearcherHand { Ok(results) } + /// Search using SearXNG meta-search engine (aggregates 70+ engines) + async fn search_searxng(&self, query: &str, max_results: usize) -> Result> { + let url = format!( + "{}/search?q={}&format=json&categories=general&language=auto&pageno=1", + self.search_config.searxng_url.trim_end_matches('/'), + url_encode(query) + ); + + let response = self.client + .get(&url) + .timeout(std::time::Duration::from_secs(self.search_config.timeout_secs)) + .send() + .await + .map_err(|e| zclaw_types::ZclawError::HandError( + format!("SearXNG request failed: {}", e) + ))?; + + let status = response.status(); + if !status.is_success() { + return Err(zclaw_types::ZclawError::HandError( + format!("SearXNG returned HTTP {}", status) + )); + } + + let json: Value = response.json().await + .map_err(|e| zclaw_types::ZclawError::HandError( + format!("Failed to parse SearXNG response: {}", e) + ))?; + + let mut results = Vec::new(); + + if let Some(items) = json.get("results").and_then(|v| v.as_array()) { + for item in items.iter().take(max_results) { + let title = item.get("title") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let url = item.get("url") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let snippet = item.get("content") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let engines = item.get("engines") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|e| e.as_str()) + .collect::>() + .join(",") + }) + .unwrap_or_default(); + let score = item.get("score") + .and_then(|v| v.as_f64()) + .unwrap_or(0.0); + + // Normalize score to 0-100 range + let relevance = if score > 0.0 { + (score.min(10.0) * 10.0) as u8 + } else { + 50 + }; + + if !title.is_empty() && !url.is_empty() { + results.push(SearchResult { + title, + url, + snippet, + source: if engines.is_empty() { + "SearXNG".to_string() + } else { + format!("SearXNG({})", engines) + }, + relevance, + content: None, + fetched_at: Some(chrono::Utc::now().to_rfc3339()), + }); + } + } + } + + Ok(results) + } + /// Search using DuckDuckGo (no API key required) async fn search_duckduckgo(&self, query: &str, max_results: usize) -> Result> { let url = format!("https://api.duckduckgo.com/?q={}&format=json&no_html=1", @@ -603,6 +794,12 @@ mod tests { assert!(matches!(engine, SearchEngine::Auto)); } + #[test] + fn test_search_engine_searxng_deserialize() { + let engine: SearchEngine = serde_json::from_str("\"searxng\"").unwrap(); + assert!(matches!(engine, SearchEngine::SearXNG)); + } + #[test] fn test_research_depth_default_is_standard() { let depth = ResearchDepth::default(); @@ -623,7 +820,7 @@ mod tests { #[test] fn test_search_engine_serialize_roundtrip() { - for engine in [SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] { + for engine in [SearchEngine::SearXNG, SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] { let json = serde_json::to_string(&engine).unwrap(); let back: SearchEngine = serde_json::from_str(&json).unwrap(); assert_eq!(json, serde_json::to_string(&back).unwrap()); @@ -849,4 +1046,126 @@ mod tests { assert!(report.summary.is_some()); assert!(report.summary.unwrap().contains("snippet text")); } + + // --- SearchConfig Tests --- + + #[test] + fn test_search_config_default() { + let config = SearchConfig::default(); + assert!(matches!(config.default_engine, SearchEngine::Auto)); + assert_eq!(config.searxng_url, "http://localhost:8888"); + assert_eq!(config.timeout_secs, 15); + } + + #[test] + fn test_search_config_load_fallback_on_missing_file() { + // Config loads from config/config.toml which may not exist in test CWD + let config = SearchConfig::load(); + // Should return a valid config either way + assert!(!config.searxng_url.is_empty()); + } + + // --- SearXNG Response Parsing Tests --- + + #[test] + fn test_searxng_response_parse() { + let mock_response = json!({ + "query": "Rust programming", + "number_of_results": 42, + "results": [ + { + "url": "https://www.rust-lang.org/", + "title": "Rust Programming Language", + "content": "A language empowering everyone to build reliable software.", + "engine": "google", + "engines": ["google", "duckduckgo"], + "score": 5.2, + "category": "general" + }, + { + "url": "https://doc.rust-lang.org/book/", + "title": "The Rust Book", + "content": "The official guide to Rust programming.", + "engine": "bing", + "engines": ["bing"], + "score": 3.1, + "category": "general" + } + ], + "suggestions": ["rust tutorial", "rust vs go"] + }); + + let results = mock_response.get("results").unwrap().as_array().unwrap(); + assert_eq!(results.len(), 2); + + // Verify first result mapping + let r0 = &results[0]; + assert_eq!(r0["title"].as_str().unwrap(), "Rust Programming Language"); + assert_eq!(r0["url"].as_str().unwrap(), "https://www.rust-lang.org/"); + assert_eq!(r0["content"].as_str().unwrap(), "A language empowering everyone to build reliable software."); + + let engines: Vec<&str> = r0["engines"].as_array().unwrap() + .iter().filter_map(|e| e.as_str()).collect(); + assert_eq!(engines, vec!["google", "duckduckgo"]); + } + + #[test] + fn test_searxng_empty_results() { + let mock_response = json!({ + "query": "nonexistent xyzzy123", + "number_of_results": 0, + "results": [], + "suggestions": [] + }); + + let results = mock_response.get("results").unwrap().as_array().unwrap(); + assert!(results.is_empty()); + } + + #[test] + fn test_searxng_score_normalization() { + // Score 5.2 → (5.2 * 10) = 52 → relevance 52 + let score = 5.2_f64; + let relevance = if score > 0.0 { + (score.min(10.0) * 10.0) as u8 + } else { + 50 + }; + assert_eq!(relevance, 52); + + // Score 15.0 → clamped to 10.0 → relevance 100 + let score = 15.0_f64; + let relevance = if score > 0.0 { + (score.min(10.0) * 10.0) as u8 + } else { + 50 + }; + assert_eq!(relevance, 100); + + // Score 0.0 → default relevance 50 + let score = 0.0_f64; + let relevance = if score > 0.0 { + (score.min(10.0) * 10.0) as u8 + } else { + 50 + }; + assert_eq!(relevance, 50); + } + + #[test] + fn test_searxng_url_construction() { + let config = SearchConfig::default(); + let query = "2024年中国医疗政策"; + let url = format!( + "{}/search?q={}&format=json&categories=general&language=auto&pageno=1", + config.searxng_url.trim_end_matches('/'), + url_encode(query) + ); + assert!(url.starts_with("http://localhost:8888/search?")); + assert!(url.contains("format=json")); + assert!(url.contains("categories=general")); + // Verify UTF-8 encoding, not Unicode codepoints + assert!(url.contains("%E4%B8%AD")); // 中 = E4 B8 AD + assert!(!url.contains("%4E2D")); // NOT Unicode codepoint + } } diff --git a/docker-compose.yml b/docker-compose.yml index 8afa093..267cb5a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -69,9 +69,28 @@ services: networks: - zclaw-saas + # ---- SearXNG Meta Search ---- + searxng: + image: searxng/searxng:latest + container_name: zclaw-searxng + restart: unless-stopped + + volumes: + - ./searxng-config/:/etc/searxng/:Z + - searxng-data:/var/cache/searxng/ + + networks: + - zclaw-saas + + # 端口仅本地访问,不对外暴露 + ports: + - "127.0.0.1:8888:8888" + volumes: postgres_data: driver: local + searxng-data: + driver: local networks: zclaw-saas: diff --git a/searxng-config/settings.yml b/searxng-config/settings.yml new file mode 100644 index 0000000..79c9545 --- /dev/null +++ b/searxng-config/settings.yml @@ -0,0 +1,48 @@ +# SearXNG configuration for ZCLAW +# Docs: https://docs.searxng.org/admin/settings/settings.html + +use_default_settings: true + +search: + safe_search: 0 + autocomplete: "" + default_lang: "auto" + formats: + - html + - json + +server: + secret_key: "zclaw-searxng-internal" + limiter: false + image_proxy: false + port: 8888 + bind_address: "0.0.0.0" + +ui: + static_use_hash: true + +enabled_plugins: + - 'Hash plugin' + - 'Self Information' + - 'Tracker URL remover' + - 'Ahmia blacklist' + +engines: + - name: google + engine: google + shortcut: g + - name: bing + engine: bing + shortcut: b + - name: duckduckgo + engine: duckduckgo + shortcut: ddg + - name: baidu + engine: baidu + shortcut: bd + - name: wikipedia + engine: wikipedia + shortcut: wp + - name: github + engine: github + shortcut: gh