Files
zclaw_openfang/crates/zclaw-hands/src/hands/browser.rs
iven 8691837608
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
fix(runtime,hands): 4项根因修复 — URL编码/Browser桩/定时解析/LLM超时
1. researcher.rs: url_encode() chars→bytes,修复中文搜索URL编码
   (U+533B→%533B 改为 UTF-8 %E5%8C%BB)
2. browser.rs: WebDriver不可用时返回明确错误而非静默成功,
   防止LLM误以为操作已完成
3. nl_schedule.rs: 新增相对延迟解析(秒后/分钟后/小时后),
   避免fallback到LLM幻觉cron
4. 4个LLM driver: 移除http1_only()防reqwest解码错误,
   超时120s→300s适配工具调用链,Anthropic裸Client::new()补全配置
2026-04-22 03:24:55 +08:00

572 lines
19 KiB
Rust

//! Browser Hand - Web automation capabilities (TypeScript delegation)
//!
//! **Architecture note (M3-02):** This Rust Hand is a **schema validator and passthrough**.
//! Every action returns `{"status": "delegated_to_frontend"}` — no real browser work happens here.
//!
//! The actual execution path is:
//! 1. Frontend `HandsPanel.tsx` intercepts browser hands → routes to `BrowserHandCard`
//! 2. `BrowserHandCard` calls `browserHandStore.executeTemplate/executeScript`
//! 3. TypeScript calls Tauri `browser_*` commands (Fantoccini-based, defined in `browser/commands.rs`)
//!
//! This dual-path exists because browser automation requires a WebDriver session managed
//! on the TypeScript side (session lifecycle, error recovery, UI feedback). The Rust Hand
//! serves as a typed schema for the action DSL and satisfies the HandRegistry contract.
//!
//! Supported actions: navigate, click, type, scrape, screenshot, fill_form, wait, execute,
//! get_source, get_url, get_title, scroll, back, forward, refresh, hover, press_key, upload, select
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use zclaw_types::Result;
use crate::{Hand, HandConfig, HandContext, HandResult, HandStatus};
/// Browser action types
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "action", rename_all = "snake_case")]
pub enum BrowserAction {
/// Navigate to a URL
Navigate {
url: String,
#[serde(default)]
wait_for: Option<String>,
},
/// Click on an element
Click {
selector: String,
#[serde(default)]
wait_ms: Option<u64>,
},
/// Type text into an element
Type {
selector: String,
text: String,
#[serde(default)]
clear_first: bool,
},
/// Select an option from a dropdown
Select {
selector: String,
value: String,
},
/// Scrape content from the page
Scrape {
selectors: Vec<String>,
#[serde(default)]
wait_for: Option<String>,
},
/// Take a screenshot
Screenshot {
#[serde(default)]
selector: Option<String>,
#[serde(default)]
full_page: bool,
},
/// Fill out a form
FillForm {
fields: Vec<FormField>,
#[serde(default)]
submit_selector: Option<String>,
},
/// Wait for an element
Wait {
selector: String,
#[serde(default = "default_timeout")]
timeout_ms: u64,
},
/// Execute JavaScript
Execute {
script: String,
#[serde(default)]
args: Vec<Value>,
},
/// Get page source
GetSource,
/// Get current URL
GetUrl,
/// Get page title
GetTitle,
/// Scroll the page
Scroll {
#[serde(default)]
x: i32,
#[serde(default)]
y: i32,
#[serde(default)]
selector: Option<String>,
},
/// Go back
Back,
/// Go forward
Forward,
/// Refresh page
Refresh,
/// Hover over an element
Hover {
selector: String,
},
/// Press a key
PressKey {
key: String,
},
/// Upload file
Upload {
selector: String,
file_path: String,
},
}
impl BrowserAction {
pub fn action_name(&self) -> &'static str {
match self {
BrowserAction::Navigate { .. } => "navigate",
BrowserAction::Click { .. } => "click",
BrowserAction::Type { .. } => "type",
BrowserAction::Select { .. } => "select",
BrowserAction::Scrape { .. } => "scrape",
BrowserAction::Screenshot { .. } => "screenshot",
BrowserAction::FillForm { .. } => "fill_form",
BrowserAction::Wait { .. } => "wait",
BrowserAction::Execute { .. } => "execute",
BrowserAction::GetSource => "get_source",
BrowserAction::GetUrl => "get_url",
BrowserAction::GetTitle => "get_title",
BrowserAction::Scroll { .. } => "scroll",
BrowserAction::Back => "back",
BrowserAction::Forward => "forward",
BrowserAction::Refresh => "refresh",
BrowserAction::Hover { .. } => "hover",
BrowserAction::PressKey { .. } => "press_key",
BrowserAction::Upload { .. } => "upload",
}
}
pub fn summary(&self) -> String {
match self {
BrowserAction::Navigate { url, .. } => format!("导航到 {}", url),
BrowserAction::Click { selector, .. } => format!("点击 {}", selector),
BrowserAction::Type { selector, text, .. } => format!("{} 输入 {}", selector, text),
BrowserAction::Select { selector, value } => format!("{} 选择 {}", selector, value),
BrowserAction::Scrape { selectors, .. } => format!("抓取 {} 个选择器", selectors.len()),
BrowserAction::Screenshot { .. } => "截图".to_string(),
BrowserAction::FillForm { fields, .. } => format!("填写 {} 个字段", fields.len()),
BrowserAction::Wait { selector, .. } => format!("等待 {}", selector),
BrowserAction::Execute { .. } => "执行脚本".to_string(),
BrowserAction::GetSource => "获取页面源码".to_string(),
BrowserAction::GetUrl => "获取当前URL".to_string(),
BrowserAction::GetTitle => "获取页面标题".to_string(),
BrowserAction::Scroll { x, y, .. } => format!("滚动到 ({},{})", x, y),
BrowserAction::Back => "后退".to_string(),
BrowserAction::Forward => "前进".to_string(),
BrowserAction::Refresh => "刷新".to_string(),
BrowserAction::Hover { selector } => format!("悬停 {}", selector),
BrowserAction::PressKey { key } => format!("按键 {}", key),
BrowserAction::Upload { selector, .. } => format!("上传文件到 {}", selector),
}
}
}
/// Form field definition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormField {
pub selector: String,
pub value: String,
}
fn default_timeout() -> u64 { 10000 }
/// Browser Hand implementation
pub struct BrowserHand {
config: HandConfig,
}
impl BrowserHand {
/// Create a new Browser Hand
pub fn new() -> Self {
Self {
config: HandConfig {
id: "browser".to_string(),
name: "浏览器".to_string(),
description: "网页浏览器自动化,支持导航、交互和数据采集".to_string(),
needs_approval: true,
dependencies: vec!["webdriver".to_string()],
input_schema: Some(serde_json::json!({
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["navigate", "click", "type", "scrape", "screenshot", "fill_form", "wait", "execute"]
},
"url": { "type": "string" },
"selector": { "type": "string" },
"text": { "type": "string" },
"selectors": { "type": "array", "items": { "type": "string" } },
"script": { "type": "string" }
},
"required": ["action"]
})),
tags: vec!["automation".to_string(), "web".to_string(), "browser".to_string()],
enabled: true,
max_concurrent: 0,
timeout_secs: 0,
},
}
}
/// Check if WebDriver is available by probing common ports
fn check_webdriver(&self) -> bool {
use std::net::TcpStream;
use std::time::Duration;
// Probe default WebDriver ports: ChromeDriver (9515), GeckoDriver (4444), Edge (17556)
let ports = [9515, 4444, 17556];
for port in ports {
let addr = format!("127.0.0.1:{}", port);
if let Ok(addr) = addr.parse() {
if TcpStream::connect_timeout(&addr, Duration::from_millis(500)).is_ok() {
return true;
}
}
}
false
}
}
impl Default for BrowserHand {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Hand for BrowserHand {
fn config(&self) -> &HandConfig {
&self.config
}
async fn execute(&self, _context: &HandContext, input: Value) -> Result<HandResult> {
let action: BrowserAction = match serde_json::from_value(input) {
Ok(a) => a,
Err(e) => return Ok(HandResult::error(format!("Invalid action: {}", e))),
};
let action_type = action.action_name();
let summary = action.summary();
// Check if WebDriver is available
if !self.check_webdriver() {
return Ok(HandResult::error(format!(
"浏览器操作「{}」无法执行:未检测到 WebDriver (ChromeDriver/GeckoDriver)。请先启动 WebDriver 服务。",
summary
)));
}
// WebDriver is running — delegate to frontend BrowserHandCard.
// The frontend manages the Fantoccini session lifecycle.
Ok(HandResult::success(serde_json::json!({
"action": action_type,
"status": "delegated_to_frontend",
"message": format!("浏览器操作「{}」已发送到前端执行。WebDriver 已就绪。", summary),
"details": format!("{} — 由前端 BrowserHandCard 通过 Fantoccini 执行。", summary),
})))
}
fn is_dependency_available(&self, dep: &str) -> bool {
match dep {
"webdriver" => self.check_webdriver(),
_ => true,
}
}
fn status(&self) -> HandStatus {
if self.check_webdriver() {
HandStatus::Idle
} else {
HandStatus::PendingApproval // Using this to indicate dependency missing
}
}
}
/// Browser automation sequence for complex operations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BrowserSequence {
/// Sequence name
pub name: String,
/// Steps to execute
pub steps: Vec<BrowserAction>,
/// Whether to stop on error
#[serde(default = "default_stop_on_error")]
pub stop_on_error: bool,
/// Delay between steps in milliseconds
#[serde(default)]
pub step_delay_ms: Option<u64>,
}
fn default_stop_on_error() -> bool { true }
impl BrowserSequence {
/// Create a new browser sequence
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
steps: Vec::new(),
stop_on_error: true,
step_delay_ms: None,
}
}
/// Add a navigate step
pub fn navigate(mut self, url: impl Into<String>) -> Self {
self.steps.push(BrowserAction::Navigate { url: url.into(), wait_for: None });
self
}
/// Add a click step
pub fn click(mut self, selector: impl Into<String>) -> Self {
self.steps.push(BrowserAction::Click { selector: selector.into(), wait_ms: None });
self
}
/// Add a type step
pub fn type_text(mut self, selector: impl Into<String>, text: impl Into<String>) -> Self {
self.steps.push(BrowserAction::Type {
selector: selector.into(),
text: text.into(),
clear_first: false,
});
self
}
/// Add a wait step
pub fn wait(mut self, selector: impl Into<String>, timeout_ms: u64) -> Self {
self.steps.push(BrowserAction::Wait { selector: selector.into(), timeout_ms });
self
}
/// Add a screenshot step
pub fn screenshot(mut self) -> Self {
self.steps.push(BrowserAction::Screenshot { selector: None, full_page: false });
self
}
/// Set whether to stop on error
pub fn stop_on_error(mut self, stop: bool) -> Self {
self.stop_on_error = stop;
self
}
/// Build the sequence
pub fn build(self) -> Vec<BrowserAction> {
self.steps
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Hand;
use std::collections::HashMap;
fn fresh_context() -> HandContext {
HandContext {
agent_id: zclaw_types::AgentId::new(),
working_dir: None,
env: HashMap::new(),
timeout_secs: 30,
callback_url: None,
}
}
#[test]
fn test_browser_config() {
let hand = BrowserHand::new();
let config = hand.config();
assert_eq!(config.id, "browser");
assert!(config.enabled);
}
#[tokio::test]
async fn test_browser_config_needs_approval() {
let hand = BrowserHand::new();
assert!(hand.config().needs_approval, "Browser hand should require approval per TOML config");
}
#[test]
fn test_action_deserialize_navigate() {
let json = serde_json::json!({
"action": "navigate",
"url": "https://example.com",
"wait_for": "body"
});
let action: BrowserAction = serde_json::from_value(json).expect("deserialize navigate");
match action {
BrowserAction::Navigate { url, wait_for } => {
assert_eq!(url, "https://example.com");
assert_eq!(wait_for, Some("body".to_string()));
}
_ => panic!("Expected Navigate action, got {:?}", action),
}
}
#[test]
fn test_action_deserialize_click() {
let json = serde_json::json!({
"action": "click",
"selector": "#submit-btn",
"wait_ms": 500
});
let action: BrowserAction = serde_json::from_value(json).expect("deserialize click");
match action {
BrowserAction::Click { selector, wait_ms } => {
assert_eq!(selector, "#submit-btn");
assert_eq!(wait_ms, Some(500));
}
_ => panic!("Expected Click action, got {:?}", action),
}
}
#[test]
fn test_action_deserialize_type() {
let json = serde_json::json!({
"action": "type",
"selector": "#search",
"text": "hello world",
"clear_first": true
});
let action: BrowserAction = serde_json::from_value(json).expect("deserialize type");
match action {
BrowserAction::Type { selector, text, clear_first } => {
assert_eq!(selector, "#search");
assert_eq!(text, "hello world");
assert!(clear_first);
}
_ => panic!("Expected Type action, got {:?}", action),
}
}
#[test]
fn test_action_deserialize_scrape() {
let json = serde_json::json!({
"action": "scrape",
"selectors": ["h1", ".content", "#price"]
});
let action: BrowserAction = serde_json::from_value(json).expect("deserialize scrape");
match action {
BrowserAction::Scrape { selectors, wait_for } => {
assert_eq!(selectors, vec!["h1", ".content", "#price"]);
assert!(wait_for.is_none());
}
_ => panic!("Expected Scrape action, got {:?}", action),
}
}
#[test]
fn test_action_deserialize_screenshot() {
let json = serde_json::json!({
"action": "screenshot",
"full_page": true
});
let action: BrowserAction = serde_json::from_value(json).expect("deserialize screenshot");
match action {
BrowserAction::Screenshot { selector, full_page } => {
assert!(selector.is_none());
assert!(full_page);
}
_ => panic!("Expected Screenshot action, got {:?}", action),
}
}
#[test]
fn test_all_major_actions_roundtrip() {
let actions = vec![
BrowserAction::Navigate { url: "https://example.com".into(), wait_for: None },
BrowserAction::Click { selector: "#btn".into(), wait_ms: None },
BrowserAction::Type { selector: "#input".into(), text: "test".into(), clear_first: false },
BrowserAction::Scrape { selectors: vec!["h1".into()], wait_for: None },
BrowserAction::Screenshot { selector: None, full_page: false },
BrowserAction::Wait { selector: "#loaded".into(), timeout_ms: 5000 },
BrowserAction::Execute { script: "return 1".into(), args: vec![] },
BrowserAction::FillForm {
fields: vec![FormField { selector: "#name".into(), value: "Alice".into() }],
submit_selector: Some("#submit".into()),
},
];
for original in actions {
let json = serde_json::to_value(&original).expect("serialize action");
let roundtripped: BrowserAction = serde_json::from_value(json).expect("deserialize action");
assert_eq!(
serde_json::to_value(&original).unwrap(),
serde_json::to_value(&roundtripped).unwrap(),
"Roundtrip failed for {:?}",
original
);
}
}
#[tokio::test]
async fn test_browser_sequence_builder() {
let ctx = fresh_context();
let hand = BrowserHand::new();
let sequence = BrowserSequence::new("test_sequence")
.navigate("https://example.com")
.stop_on_error(false);
assert_eq!(sequence.name, "test_sequence");
assert!(!sequence.stop_on_error);
assert_eq!(sequence.steps.len(), 1);
// Execute the navigate step — without WebDriver running, should report error
let action_json = serde_json::to_value(&sequence.steps[0]).expect("serialize step");
let result = hand.execute(&ctx, action_json).await.expect("execute");
// In test env no WebDriver is running, so we get an error about missing WebDriver
if result.success {
assert_eq!(result.output["action"], "navigate");
assert_eq!(result.output["status"], "delegated_to_frontend");
} else {
assert!(result.error.as_deref().unwrap_or("").contains("WebDriver"));
}
}
#[tokio::test]
async fn test_browser_sequence_multiple_steps() {
let ctx = fresh_context();
let hand = BrowserHand::new();
let sequence = BrowserSequence::new("multi_step")
.navigate("https://example.com")
.click("#login-btn")
.type_text("#username", "admin")
.screenshot();
assert_eq!(sequence.steps.len(), 4);
// Verify each step can parse and execute (or report missing WebDriver)
for (i, step) in sequence.steps.iter().enumerate() {
let action_json = serde_json::to_value(step).expect("serialize step");
let result = hand.execute(&ctx, action_json).await.expect("execute step");
// Without WebDriver, all steps should report the error cleanly
if !result.success {
assert!(
result.error.as_deref().unwrap_or("").contains("WebDriver"),
"Step {} unexpected error: {:?}",
i, result.error
);
}
}
}
#[test]
fn test_form_field_deserialize() {
let json = serde_json::json!({
"selector": "#email",
"value": "user@example.com"
});
let field: FormField = serde_json::from_value(json).expect("deserialize form field");
assert_eq!(field.selector, "#email");
assert_eq!(field.value, "user@example.com");
}
}