fix(hands): 集成 SearXNG 元搜索引擎 — 替换不可用的 DuckDuckGo Instant Answer API
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

- ResearcherHand 新增 search_searxng() 方法,调用 SearXNG JSON API 聚合 70+ 搜索引擎
- SearchEngine 枚举增加 SearXNG 变体,路由逻辑按配置分发搜索后端
- Auto 模式: SearXNG 优先 → DuckDuckGo fallback
- config.toml [tools.web.search] 新增 searxng_url/searxng_timeout 配置
- docker-compose.yml 新增 SearXNG 服务容器 (searxng-config/settings.yml)
- 新增 6 个 SearXNG 相关单元测试 (响应解析/URL构造/分数归一化/配置加载)
- 验证: 124 tests PASS, workspace 0 warnings
This commit is contained in:
iven
2026-04-22 10:52:13 +08:00
parent 39a7ac3356
commit 0fd981905d
6 changed files with 396 additions and 6 deletions

1
Cargo.lock generated
View File

@@ -9491,6 +9491,7 @@ dependencies = [
"serde_json",
"thiserror 2.0.18",
"tokio",
"toml 0.8.2",
"tracing",
"uuid",
"zclaw-runtime",

View File

@@ -223,8 +223,10 @@ timeout = "30s"
[tools.web]
[tools.web.search]
enabled = true
default_engine = "duckduckgo"
default_engine = "searxng"
max_results = 10
searxng_url = "http://localhost:8888"
searxng_timeout = 15
# File system tool
[tools.fs]

View File

@@ -22,3 +22,4 @@ async-trait = { workspace = true }
reqwest = { workspace = true }
base64 = { workspace = true }
dirs = { workspace = true }
toml = { workspace = true }

View File

@@ -16,6 +16,7 @@ use crate::{Hand, HandConfig, HandContext, HandResult};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum SearchEngine {
SearXNG,
Google,
Bing,
DuckDuckGo,
@@ -28,6 +29,83 @@ impl Default for SearchEngine {
}
}
/// Search configuration loaded from config/config.toml
#[derive(Debug, Clone)]
struct SearchConfig {
default_engine: SearchEngine,
searxng_url: String,
timeout_secs: u64,
}
impl Default for SearchConfig {
fn default() -> Self {
Self {
default_engine: SearchEngine::Auto,
searxng_url: "http://localhost:8888".to_string(),
timeout_secs: 15,
}
}
}
impl SearchConfig {
fn load() -> Self {
let path = "config/config.toml";
let content = match std::fs::read_to_string(path) {
Ok(c) => c,
Err(_) => return Self::default(),
};
#[derive(Deserialize)]
struct ToolsWebSearch {
default_engine: Option<String>,
#[allow(dead_code)]
max_results: Option<usize>,
searxng_url: Option<String>,
searxng_timeout: Option<u64>,
}
#[derive(Deserialize)]
struct ToolsWeb {
search: Option<ToolsWebSearch>,
}
#[derive(Deserialize)]
struct Tools {
web: Option<ToolsWeb>,
}
#[derive(Deserialize)]
struct Config {
tools: Option<Tools>,
}
let config: Config = match toml::from_str(&content) {
Ok(c) => c,
Err(_) => return Self::default(),
};
let search = config.tools
.and_then(|t| t.web)
.and_then(|w| w.search);
match search {
Some(s) => {
let engine = s.default_engine
.as_deref()
.and_then(|e| serde_json::from_str(&format!("\"{}\"", e)).ok())
.unwrap_or_default();
Self {
default_engine: engine,
searxng_url: s.searxng_url
.unwrap_or_else(|| "http://localhost:8888".to_string()),
timeout_secs: s.searxng_timeout.unwrap_or(15),
}
}
None => Self::default(),
}
}
}
/// Research depth level
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
@@ -132,6 +210,7 @@ pub enum ResearcherAction {
/// Researcher Hand implementation
pub struct ResearcherHand {
config: HandConfig,
search_config: SearchConfig,
client: reqwest::Client,
cache: Arc<RwLock<HashMap<String, SearchResult>>>,
}
@@ -156,7 +235,7 @@ impl ResearcherHand {
"type": "object",
"properties": {
"query": { "type": "string" },
"engine": { "type": "string", "enum": ["google", "bing", "duckduckgo", "auto"] },
"engine": { "type": "string", "enum": ["searxng", "google", "bing", "duckduckgo", "auto"] },
"depth": { "type": "string", "enum": ["quick", "standard", "deep"] },
"maxResults": { "type": "integer" }
},
@@ -186,6 +265,7 @@ impl ResearcherHand {
max_concurrent: 0,
timeout_secs: 0,
},
search_config: SearchConfig::load(),
client: reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.user_agent("ZCLAW-Researcher/1.0")
@@ -195,17 +275,42 @@ impl ResearcherHand {
}
}
/// Execute a web search
/// Execute a web search — route to the configured backend
async fn execute_search(&self, query: &ResearchQuery) -> Result<Vec<SearchResult>> {
let start = std::time::Instant::now();
// Use DuckDuckGo as default search (no API key required)
let results = self.search_duckduckgo(&query.query, query.max_results).await?;
let engine = match &query.engine {
SearchEngine::Auto => &self.search_config.default_engine,
other => other,
};
let results = match engine {
SearchEngine::SearXNG | SearchEngine::Auto => {
match self.search_searxng(&query.query, query.max_results).await {
Ok(r) if !r.is_empty() => r,
_ => {
tracing::warn!(target: "researcher", "SearXNG failed or empty, falling back to DuckDuckGo");
self.search_duckduckgo(&query.query, query.max_results).await?
}
}
}
SearchEngine::DuckDuckGo => {
self.search_duckduckgo(&query.query, query.max_results).await?
}
SearchEngine::Google | SearchEngine::Bing => {
// Google/Bing not yet implemented, fall back to SearXNG which aggregates them
match self.search_searxng(&query.query, query.max_results).await {
Ok(r) if !r.is_empty() => r,
_ => self.search_duckduckgo(&query.query, query.max_results).await?,
}
}
};
let duration = start.elapsed().as_millis() as u64;
tracing::info!(
target: "researcher",
query = %query.query,
engine = ?engine,
duration_ms = duration,
results_count = results.len(),
"Search completed"
@@ -214,6 +319,92 @@ impl ResearcherHand {
Ok(results)
}
/// Search using SearXNG meta-search engine (aggregates 70+ engines)
async fn search_searxng(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
let url = format!(
"{}/search?q={}&format=json&categories=general&language=auto&pageno=1",
self.search_config.searxng_url.trim_end_matches('/'),
url_encode(query)
);
let response = self.client
.get(&url)
.timeout(std::time::Duration::from_secs(self.search_config.timeout_secs))
.send()
.await
.map_err(|e| zclaw_types::ZclawError::HandError(
format!("SearXNG request failed: {}", e)
))?;
let status = response.status();
if !status.is_success() {
return Err(zclaw_types::ZclawError::HandError(
format!("SearXNG returned HTTP {}", status)
));
}
let json: Value = response.json().await
.map_err(|e| zclaw_types::ZclawError::HandError(
format!("Failed to parse SearXNG response: {}", e)
))?;
let mut results = Vec::new();
if let Some(items) = json.get("results").and_then(|v| v.as_array()) {
for item in items.iter().take(max_results) {
let title = item.get("title")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let url = item.get("url")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let snippet = item.get("content")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let engines = item.get("engines")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|e| e.as_str())
.collect::<Vec<_>>()
.join(",")
})
.unwrap_or_default();
let score = item.get("score")
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
// Normalize score to 0-100 range
let relevance = if score > 0.0 {
(score.min(10.0) * 10.0) as u8
} else {
50
};
if !title.is_empty() && !url.is_empty() {
results.push(SearchResult {
title,
url,
snippet,
source: if engines.is_empty() {
"SearXNG".to_string()
} else {
format!("SearXNG({})", engines)
},
relevance,
content: None,
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
});
}
}
}
Ok(results)
}
/// Search using DuckDuckGo (no API key required)
async fn search_duckduckgo(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
let url = format!("https://api.duckduckgo.com/?q={}&format=json&no_html=1",
@@ -603,6 +794,12 @@ mod tests {
assert!(matches!(engine, SearchEngine::Auto));
}
#[test]
fn test_search_engine_searxng_deserialize() {
let engine: SearchEngine = serde_json::from_str("\"searxng\"").unwrap();
assert!(matches!(engine, SearchEngine::SearXNG));
}
#[test]
fn test_research_depth_default_is_standard() {
let depth = ResearchDepth::default();
@@ -623,7 +820,7 @@ mod tests {
#[test]
fn test_search_engine_serialize_roundtrip() {
for engine in [SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] {
for engine in [SearchEngine::SearXNG, SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] {
let json = serde_json::to_string(&engine).unwrap();
let back: SearchEngine = serde_json::from_str(&json).unwrap();
assert_eq!(json, serde_json::to_string(&back).unwrap());
@@ -849,4 +1046,126 @@ mod tests {
assert!(report.summary.is_some());
assert!(report.summary.unwrap().contains("snippet text"));
}
// --- SearchConfig Tests ---
#[test]
fn test_search_config_default() {
let config = SearchConfig::default();
assert!(matches!(config.default_engine, SearchEngine::Auto));
assert_eq!(config.searxng_url, "http://localhost:8888");
assert_eq!(config.timeout_secs, 15);
}
#[test]
fn test_search_config_load_fallback_on_missing_file() {
// Config loads from config/config.toml which may not exist in test CWD
let config = SearchConfig::load();
// Should return a valid config either way
assert!(!config.searxng_url.is_empty());
}
// --- SearXNG Response Parsing Tests ---
#[test]
fn test_searxng_response_parse() {
let mock_response = json!({
"query": "Rust programming",
"number_of_results": 42,
"results": [
{
"url": "https://www.rust-lang.org/",
"title": "Rust Programming Language",
"content": "A language empowering everyone to build reliable software.",
"engine": "google",
"engines": ["google", "duckduckgo"],
"score": 5.2,
"category": "general"
},
{
"url": "https://doc.rust-lang.org/book/",
"title": "The Rust Book",
"content": "The official guide to Rust programming.",
"engine": "bing",
"engines": ["bing"],
"score": 3.1,
"category": "general"
}
],
"suggestions": ["rust tutorial", "rust vs go"]
});
let results = mock_response.get("results").unwrap().as_array().unwrap();
assert_eq!(results.len(), 2);
// Verify first result mapping
let r0 = &results[0];
assert_eq!(r0["title"].as_str().unwrap(), "Rust Programming Language");
assert_eq!(r0["url"].as_str().unwrap(), "https://www.rust-lang.org/");
assert_eq!(r0["content"].as_str().unwrap(), "A language empowering everyone to build reliable software.");
let engines: Vec<&str> = r0["engines"].as_array().unwrap()
.iter().filter_map(|e| e.as_str()).collect();
assert_eq!(engines, vec!["google", "duckduckgo"]);
}
#[test]
fn test_searxng_empty_results() {
let mock_response = json!({
"query": "nonexistent xyzzy123",
"number_of_results": 0,
"results": [],
"suggestions": []
});
let results = mock_response.get("results").unwrap().as_array().unwrap();
assert!(results.is_empty());
}
#[test]
fn test_searxng_score_normalization() {
// Score 5.2 → (5.2 * 10) = 52 → relevance 52
let score = 5.2_f64;
let relevance = if score > 0.0 {
(score.min(10.0) * 10.0) as u8
} else {
50
};
assert_eq!(relevance, 52);
// Score 15.0 → clamped to 10.0 → relevance 100
let score = 15.0_f64;
let relevance = if score > 0.0 {
(score.min(10.0) * 10.0) as u8
} else {
50
};
assert_eq!(relevance, 100);
// Score 0.0 → default relevance 50
let score = 0.0_f64;
let relevance = if score > 0.0 {
(score.min(10.0) * 10.0) as u8
} else {
50
};
assert_eq!(relevance, 50);
}
#[test]
fn test_searxng_url_construction() {
let config = SearchConfig::default();
let query = "2024年中国医疗政策";
let url = format!(
"{}/search?q={}&format=json&categories=general&language=auto&pageno=1",
config.searxng_url.trim_end_matches('/'),
url_encode(query)
);
assert!(url.starts_with("http://localhost:8888/search?"));
assert!(url.contains("format=json"));
assert!(url.contains("categories=general"));
// Verify UTF-8 encoding, not Unicode codepoints
assert!(url.contains("%E4%B8%AD")); // 中 = E4 B8 AD
assert!(!url.contains("%4E2D")); // NOT Unicode codepoint
}
}

View File

@@ -69,9 +69,28 @@ services:
networks:
- zclaw-saas
# ---- SearXNG Meta Search ----
searxng:
image: searxng/searxng:latest
container_name: zclaw-searxng
restart: unless-stopped
volumes:
- ./searxng-config/:/etc/searxng/:Z
- searxng-data:/var/cache/searxng/
networks:
- zclaw-saas
# 端口仅本地访问,不对外暴露
ports:
- "127.0.0.1:8888:8888"
volumes:
postgres_data:
driver: local
searxng-data:
driver: local
networks:
zclaw-saas:

View File

@@ -0,0 +1,48 @@
# SearXNG configuration for ZCLAW
# Docs: https://docs.searxng.org/admin/settings/settings.html
use_default_settings: true
search:
safe_search: 0
autocomplete: ""
default_lang: "auto"
formats:
- html
- json
server:
secret_key: "zclaw-searxng-internal"
limiter: false
image_proxy: false
port: 8888
bind_address: "0.0.0.0"
ui:
static_use_hash: true
enabled_plugins:
- 'Hash plugin'
- 'Self Information'
- 'Tracker URL remover'
- 'Ahmia blacklist'
engines:
- name: google
engine: google
shortcut: g
- name: bing
engine: bing
shortcut: b
- name: duckduckgo
engine: duckduckgo
shortcut: ddg
- name: baidu
engine: baidu
shortcut: bd
- name: wikipedia
engine: wikipedia
shortcut: wp
- name: github
engine: github
shortcut: gh