fix(hands): 集成 SearXNG 元搜索引擎 — 替换不可用的 DuckDuckGo Instant Answer API
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
- ResearcherHand 新增 search_searxng() 方法,调用 SearXNG JSON API 聚合 70+ 搜索引擎 - SearchEngine 枚举增加 SearXNG 变体,路由逻辑按配置分发搜索后端 - Auto 模式: SearXNG 优先 → DuckDuckGo fallback - config.toml [tools.web.search] 新增 searxng_url/searxng_timeout 配置 - docker-compose.yml 新增 SearXNG 服务容器 (searxng-config/settings.yml) - 新增 6 个 SearXNG 相关单元测试 (响应解析/URL构造/分数归一化/配置加载) - 验证: 124 tests PASS, workspace 0 warnings
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -9491,6 +9491,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
"tokio",
|
||||
"toml 0.8.2",
|
||||
"tracing",
|
||||
"uuid",
|
||||
"zclaw-runtime",
|
||||
|
||||
@@ -223,8 +223,10 @@ timeout = "30s"
|
||||
[tools.web]
|
||||
[tools.web.search]
|
||||
enabled = true
|
||||
default_engine = "duckduckgo"
|
||||
default_engine = "searxng"
|
||||
max_results = 10
|
||||
searxng_url = "http://localhost:8888"
|
||||
searxng_timeout = 15
|
||||
|
||||
# File system tool
|
||||
[tools.fs]
|
||||
|
||||
@@ -22,3 +22,4 @@ async-trait = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
dirs = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
|
||||
@@ -16,6 +16,7 @@ use crate::{Hand, HandConfig, HandContext, HandResult};
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum SearchEngine {
|
||||
SearXNG,
|
||||
Google,
|
||||
Bing,
|
||||
DuckDuckGo,
|
||||
@@ -28,6 +29,83 @@ impl Default for SearchEngine {
|
||||
}
|
||||
}
|
||||
|
||||
/// Search configuration loaded from config/config.toml
|
||||
#[derive(Debug, Clone)]
|
||||
struct SearchConfig {
|
||||
default_engine: SearchEngine,
|
||||
searxng_url: String,
|
||||
timeout_secs: u64,
|
||||
}
|
||||
|
||||
impl Default for SearchConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
default_engine: SearchEngine::Auto,
|
||||
searxng_url: "http://localhost:8888".to_string(),
|
||||
timeout_secs: 15,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SearchConfig {
|
||||
fn load() -> Self {
|
||||
let path = "config/config.toml";
|
||||
let content = match std::fs::read_to_string(path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Self::default(),
|
||||
};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ToolsWebSearch {
|
||||
default_engine: Option<String>,
|
||||
#[allow(dead_code)]
|
||||
max_results: Option<usize>,
|
||||
searxng_url: Option<String>,
|
||||
searxng_timeout: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ToolsWeb {
|
||||
search: Option<ToolsWebSearch>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Tools {
|
||||
web: Option<ToolsWeb>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct Config {
|
||||
tools: Option<Tools>,
|
||||
}
|
||||
|
||||
let config: Config = match toml::from_str(&content) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Self::default(),
|
||||
};
|
||||
|
||||
let search = config.tools
|
||||
.and_then(|t| t.web)
|
||||
.and_then(|w| w.search);
|
||||
|
||||
match search {
|
||||
Some(s) => {
|
||||
let engine = s.default_engine
|
||||
.as_deref()
|
||||
.and_then(|e| serde_json::from_str(&format!("\"{}\"", e)).ok())
|
||||
.unwrap_or_default();
|
||||
Self {
|
||||
default_engine: engine,
|
||||
searxng_url: s.searxng_url
|
||||
.unwrap_or_else(|| "http://localhost:8888".to_string()),
|
||||
timeout_secs: s.searxng_timeout.unwrap_or(15),
|
||||
}
|
||||
}
|
||||
None => Self::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Research depth level
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
@@ -132,6 +210,7 @@ pub enum ResearcherAction {
|
||||
/// Researcher Hand implementation
|
||||
pub struct ResearcherHand {
|
||||
config: HandConfig,
|
||||
search_config: SearchConfig,
|
||||
client: reqwest::Client,
|
||||
cache: Arc<RwLock<HashMap<String, SearchResult>>>,
|
||||
}
|
||||
@@ -156,7 +235,7 @@ impl ResearcherHand {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": { "type": "string" },
|
||||
"engine": { "type": "string", "enum": ["google", "bing", "duckduckgo", "auto"] },
|
||||
"engine": { "type": "string", "enum": ["searxng", "google", "bing", "duckduckgo", "auto"] },
|
||||
"depth": { "type": "string", "enum": ["quick", "standard", "deep"] },
|
||||
"maxResults": { "type": "integer" }
|
||||
},
|
||||
@@ -186,6 +265,7 @@ impl ResearcherHand {
|
||||
max_concurrent: 0,
|
||||
timeout_secs: 0,
|
||||
},
|
||||
search_config: SearchConfig::load(),
|
||||
client: reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.user_agent("ZCLAW-Researcher/1.0")
|
||||
@@ -195,17 +275,42 @@ impl ResearcherHand {
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a web search
|
||||
/// Execute a web search — route to the configured backend
|
||||
async fn execute_search(&self, query: &ResearchQuery) -> Result<Vec<SearchResult>> {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
// Use DuckDuckGo as default search (no API key required)
|
||||
let results = self.search_duckduckgo(&query.query, query.max_results).await?;
|
||||
let engine = match &query.engine {
|
||||
SearchEngine::Auto => &self.search_config.default_engine,
|
||||
other => other,
|
||||
};
|
||||
|
||||
let results = match engine {
|
||||
SearchEngine::SearXNG | SearchEngine::Auto => {
|
||||
match self.search_searxng(&query.query, query.max_results).await {
|
||||
Ok(r) if !r.is_empty() => r,
|
||||
_ => {
|
||||
tracing::warn!(target: "researcher", "SearXNG failed or empty, falling back to DuckDuckGo");
|
||||
self.search_duckduckgo(&query.query, query.max_results).await?
|
||||
}
|
||||
}
|
||||
}
|
||||
SearchEngine::DuckDuckGo => {
|
||||
self.search_duckduckgo(&query.query, query.max_results).await?
|
||||
}
|
||||
SearchEngine::Google | SearchEngine::Bing => {
|
||||
// Google/Bing not yet implemented, fall back to SearXNG which aggregates them
|
||||
match self.search_searxng(&query.query, query.max_results).await {
|
||||
Ok(r) if !r.is_empty() => r,
|
||||
_ => self.search_duckduckgo(&query.query, query.max_results).await?,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let duration = start.elapsed().as_millis() as u64;
|
||||
tracing::info!(
|
||||
target: "researcher",
|
||||
query = %query.query,
|
||||
engine = ?engine,
|
||||
duration_ms = duration,
|
||||
results_count = results.len(),
|
||||
"Search completed"
|
||||
@@ -214,6 +319,92 @@ impl ResearcherHand {
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Search using SearXNG meta-search engine (aggregates 70+ engines)
|
||||
async fn search_searxng(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
|
||||
let url = format!(
|
||||
"{}/search?q={}&format=json&categories=general&language=auto&pageno=1",
|
||||
self.search_config.searxng_url.trim_end_matches('/'),
|
||||
url_encode(query)
|
||||
);
|
||||
|
||||
let response = self.client
|
||||
.get(&url)
|
||||
.timeout(std::time::Duration::from_secs(self.search_config.timeout_secs))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| zclaw_types::ZclawError::HandError(
|
||||
format!("SearXNG request failed: {}", e)
|
||||
))?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
return Err(zclaw_types::ZclawError::HandError(
|
||||
format!("SearXNG returned HTTP {}", status)
|
||||
));
|
||||
}
|
||||
|
||||
let json: Value = response.json().await
|
||||
.map_err(|e| zclaw_types::ZclawError::HandError(
|
||||
format!("Failed to parse SearXNG response: {}", e)
|
||||
))?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
if let Some(items) = json.get("results").and_then(|v| v.as_array()) {
|
||||
for item in items.iter().take(max_results) {
|
||||
let title = item.get("title")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let url = item.get("url")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let snippet = item.get("content")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
let engines = item.get("engines")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| {
|
||||
arr.iter()
|
||||
.filter_map(|e| e.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let score = item.get("score")
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
// Normalize score to 0-100 range
|
||||
let relevance = if score > 0.0 {
|
||||
(score.min(10.0) * 10.0) as u8
|
||||
} else {
|
||||
50
|
||||
};
|
||||
|
||||
if !title.is_empty() && !url.is_empty() {
|
||||
results.push(SearchResult {
|
||||
title,
|
||||
url,
|
||||
snippet,
|
||||
source: if engines.is_empty() {
|
||||
"SearXNG".to_string()
|
||||
} else {
|
||||
format!("SearXNG({})", engines)
|
||||
},
|
||||
relevance,
|
||||
content: None,
|
||||
fetched_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Search using DuckDuckGo (no API key required)
|
||||
async fn search_duckduckgo(&self, query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
|
||||
let url = format!("https://api.duckduckgo.com/?q={}&format=json&no_html=1",
|
||||
@@ -603,6 +794,12 @@ mod tests {
|
||||
assert!(matches!(engine, SearchEngine::Auto));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_engine_searxng_deserialize() {
|
||||
let engine: SearchEngine = serde_json::from_str("\"searxng\"").unwrap();
|
||||
assert!(matches!(engine, SearchEngine::SearXNG));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_research_depth_default_is_standard() {
|
||||
let depth = ResearchDepth::default();
|
||||
@@ -623,7 +820,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_search_engine_serialize_roundtrip() {
|
||||
for engine in [SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] {
|
||||
for engine in [SearchEngine::SearXNG, SearchEngine::Google, SearchEngine::Bing, SearchEngine::DuckDuckGo, SearchEngine::Auto] {
|
||||
let json = serde_json::to_string(&engine).unwrap();
|
||||
let back: SearchEngine = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(json, serde_json::to_string(&back).unwrap());
|
||||
@@ -849,4 +1046,126 @@ mod tests {
|
||||
assert!(report.summary.is_some());
|
||||
assert!(report.summary.unwrap().contains("snippet text"));
|
||||
}
|
||||
|
||||
// --- SearchConfig Tests ---
|
||||
|
||||
#[test]
|
||||
fn test_search_config_default() {
|
||||
let config = SearchConfig::default();
|
||||
assert!(matches!(config.default_engine, SearchEngine::Auto));
|
||||
assert_eq!(config.searxng_url, "http://localhost:8888");
|
||||
assert_eq!(config.timeout_secs, 15);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_search_config_load_fallback_on_missing_file() {
|
||||
// Config loads from config/config.toml which may not exist in test CWD
|
||||
let config = SearchConfig::load();
|
||||
// Should return a valid config either way
|
||||
assert!(!config.searxng_url.is_empty());
|
||||
}
|
||||
|
||||
// --- SearXNG Response Parsing Tests ---
|
||||
|
||||
#[test]
|
||||
fn test_searxng_response_parse() {
|
||||
let mock_response = json!({
|
||||
"query": "Rust programming",
|
||||
"number_of_results": 42,
|
||||
"results": [
|
||||
{
|
||||
"url": "https://www.rust-lang.org/",
|
||||
"title": "Rust Programming Language",
|
||||
"content": "A language empowering everyone to build reliable software.",
|
||||
"engine": "google",
|
||||
"engines": ["google", "duckduckgo"],
|
||||
"score": 5.2,
|
||||
"category": "general"
|
||||
},
|
||||
{
|
||||
"url": "https://doc.rust-lang.org/book/",
|
||||
"title": "The Rust Book",
|
||||
"content": "The official guide to Rust programming.",
|
||||
"engine": "bing",
|
||||
"engines": ["bing"],
|
||||
"score": 3.1,
|
||||
"category": "general"
|
||||
}
|
||||
],
|
||||
"suggestions": ["rust tutorial", "rust vs go"]
|
||||
});
|
||||
|
||||
let results = mock_response.get("results").unwrap().as_array().unwrap();
|
||||
assert_eq!(results.len(), 2);
|
||||
|
||||
// Verify first result mapping
|
||||
let r0 = &results[0];
|
||||
assert_eq!(r0["title"].as_str().unwrap(), "Rust Programming Language");
|
||||
assert_eq!(r0["url"].as_str().unwrap(), "https://www.rust-lang.org/");
|
||||
assert_eq!(r0["content"].as_str().unwrap(), "A language empowering everyone to build reliable software.");
|
||||
|
||||
let engines: Vec<&str> = r0["engines"].as_array().unwrap()
|
||||
.iter().filter_map(|e| e.as_str()).collect();
|
||||
assert_eq!(engines, vec!["google", "duckduckgo"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_searxng_empty_results() {
|
||||
let mock_response = json!({
|
||||
"query": "nonexistent xyzzy123",
|
||||
"number_of_results": 0,
|
||||
"results": [],
|
||||
"suggestions": []
|
||||
});
|
||||
|
||||
let results = mock_response.get("results").unwrap().as_array().unwrap();
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_searxng_score_normalization() {
|
||||
// Score 5.2 → (5.2 * 10) = 52 → relevance 52
|
||||
let score = 5.2_f64;
|
||||
let relevance = if score > 0.0 {
|
||||
(score.min(10.0) * 10.0) as u8
|
||||
} else {
|
||||
50
|
||||
};
|
||||
assert_eq!(relevance, 52);
|
||||
|
||||
// Score 15.0 → clamped to 10.0 → relevance 100
|
||||
let score = 15.0_f64;
|
||||
let relevance = if score > 0.0 {
|
||||
(score.min(10.0) * 10.0) as u8
|
||||
} else {
|
||||
50
|
||||
};
|
||||
assert_eq!(relevance, 100);
|
||||
|
||||
// Score 0.0 → default relevance 50
|
||||
let score = 0.0_f64;
|
||||
let relevance = if score > 0.0 {
|
||||
(score.min(10.0) * 10.0) as u8
|
||||
} else {
|
||||
50
|
||||
};
|
||||
assert_eq!(relevance, 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_searxng_url_construction() {
|
||||
let config = SearchConfig::default();
|
||||
let query = "2024年中国医疗政策";
|
||||
let url = format!(
|
||||
"{}/search?q={}&format=json&categories=general&language=auto&pageno=1",
|
||||
config.searxng_url.trim_end_matches('/'),
|
||||
url_encode(query)
|
||||
);
|
||||
assert!(url.starts_with("http://localhost:8888/search?"));
|
||||
assert!(url.contains("format=json"));
|
||||
assert!(url.contains("categories=general"));
|
||||
// Verify UTF-8 encoding, not Unicode codepoints
|
||||
assert!(url.contains("%E4%B8%AD")); // 中 = E4 B8 AD
|
||||
assert!(!url.contains("%4E2D")); // NOT Unicode codepoint
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,9 +69,28 @@ services:
|
||||
networks:
|
||||
- zclaw-saas
|
||||
|
||||
# ---- SearXNG Meta Search ----
|
||||
searxng:
|
||||
image: searxng/searxng:latest
|
||||
container_name: zclaw-searxng
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
- ./searxng-config/:/etc/searxng/:Z
|
||||
- searxng-data:/var/cache/searxng/
|
||||
|
||||
networks:
|
||||
- zclaw-saas
|
||||
|
||||
# 端口仅本地访问,不对外暴露
|
||||
ports:
|
||||
- "127.0.0.1:8888:8888"
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
driver: local
|
||||
searxng-data:
|
||||
driver: local
|
||||
|
||||
networks:
|
||||
zclaw-saas:
|
||||
|
||||
48
searxng-config/settings.yml
Normal file
48
searxng-config/settings.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
# SearXNG configuration for ZCLAW
|
||||
# Docs: https://docs.searxng.org/admin/settings/settings.html
|
||||
|
||||
use_default_settings: true
|
||||
|
||||
search:
|
||||
safe_search: 0
|
||||
autocomplete: ""
|
||||
default_lang: "auto"
|
||||
formats:
|
||||
- html
|
||||
- json
|
||||
|
||||
server:
|
||||
secret_key: "zclaw-searxng-internal"
|
||||
limiter: false
|
||||
image_proxy: false
|
||||
port: 8888
|
||||
bind_address: "0.0.0.0"
|
||||
|
||||
ui:
|
||||
static_use_hash: true
|
||||
|
||||
enabled_plugins:
|
||||
- 'Hash plugin'
|
||||
- 'Self Information'
|
||||
- 'Tracker URL remover'
|
||||
- 'Ahmia blacklist'
|
||||
|
||||
engines:
|
||||
- name: google
|
||||
engine: google
|
||||
shortcut: g
|
||||
- name: bing
|
||||
engine: bing
|
||||
shortcut: b
|
||||
- name: duckduckgo
|
||||
engine: duckduckgo
|
||||
shortcut: ddg
|
||||
- name: baidu
|
||||
engine: baidu
|
||||
shortcut: bd
|
||||
- name: wikipedia
|
||||
engine: wikipedia
|
||||
shortcut: wp
|
||||
- name: github
|
||||
engine: github
|
||||
shortcut: gh
|
||||
Reference in New Issue
Block a user