//! Browser Hand - Web automation capabilities (TypeScript delegation) //! //! **Architecture note (M3-02):** This Rust Hand is a **schema validator and passthrough**. //! Every action returns `{"status": "delegated_to_frontend"}` — no real browser work happens here. //! //! The actual execution path is: //! 1. Frontend `HandsPanel.tsx` intercepts browser hands → routes to `BrowserHandCard` //! 2. `BrowserHandCard` calls `browserHandStore.executeTemplate/executeScript` //! 3. TypeScript calls Tauri `browser_*` commands (Fantoccini-based, defined in `browser/commands.rs`) //! //! This dual-path exists because browser automation requires a WebDriver session managed //! on the TypeScript side (session lifecycle, error recovery, UI feedback). The Rust Hand //! serves as a typed schema for the action DSL and satisfies the HandRegistry contract. //! //! Supported actions: navigate, click, type, scrape, screenshot, fill_form, wait, execute, //! get_source, get_url, get_title, scroll, back, forward, refresh, hover, press_key, upload, select use async_trait::async_trait; use serde::{Deserialize, Serialize}; use serde_json::Value; use zclaw_types::Result; use crate::{Hand, HandConfig, HandContext, HandResult, HandStatus}; /// Browser action types #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "action", rename_all = "snake_case")] pub enum BrowserAction { /// Navigate to a URL Navigate { url: String, #[serde(default)] wait_for: Option, }, /// Click on an element Click { selector: String, #[serde(default)] wait_ms: Option, }, /// Type text into an element Type { selector: String, text: String, #[serde(default)] clear_first: bool, }, /// Select an option from a dropdown Select { selector: String, value: String, }, /// Scrape content from the page Scrape { selectors: Vec, #[serde(default)] wait_for: Option, }, /// Take a screenshot Screenshot { #[serde(default)] selector: Option, #[serde(default)] full_page: bool, }, /// Fill out a form FillForm { fields: Vec, #[serde(default)] submit_selector: Option, }, /// Wait for an element Wait { selector: String, #[serde(default = "default_timeout")] timeout_ms: u64, }, /// Execute JavaScript Execute { script: String, #[serde(default)] args: Vec, }, /// Get page source GetSource, /// Get current URL GetUrl, /// Get page title GetTitle, /// Scroll the page Scroll { #[serde(default)] x: i32, #[serde(default)] y: i32, #[serde(default)] selector: Option, }, /// Go back Back, /// Go forward Forward, /// Refresh page Refresh, /// Hover over an element Hover { selector: String, }, /// Press a key PressKey { key: String, }, /// Upload file Upload { selector: String, file_path: String, }, } impl BrowserAction { pub fn action_name(&self) -> &'static str { match self { BrowserAction::Navigate { .. } => "navigate", BrowserAction::Click { .. } => "click", BrowserAction::Type { .. } => "type", BrowserAction::Select { .. } => "select", BrowserAction::Scrape { .. } => "scrape", BrowserAction::Screenshot { .. } => "screenshot", BrowserAction::FillForm { .. } => "fill_form", BrowserAction::Wait { .. } => "wait", BrowserAction::Execute { .. } => "execute", BrowserAction::GetSource => "get_source", BrowserAction::GetUrl => "get_url", BrowserAction::GetTitle => "get_title", BrowserAction::Scroll { .. } => "scroll", BrowserAction::Back => "back", BrowserAction::Forward => "forward", BrowserAction::Refresh => "refresh", BrowserAction::Hover { .. } => "hover", BrowserAction::PressKey { .. } => "press_key", BrowserAction::Upload { .. } => "upload", } } pub fn summary(&self) -> String { match self { BrowserAction::Navigate { url, .. } => format!("导航到 {}", url), BrowserAction::Click { selector, .. } => format!("点击 {}", selector), BrowserAction::Type { selector, text, .. } => format!("在 {} 输入 {}", selector, text), BrowserAction::Select { selector, value } => format!("在 {} 选择 {}", selector, value), BrowserAction::Scrape { selectors, .. } => format!("抓取 {} 个选择器", selectors.len()), BrowserAction::Screenshot { .. } => "截图".to_string(), BrowserAction::FillForm { fields, .. } => format!("填写 {} 个字段", fields.len()), BrowserAction::Wait { selector, .. } => format!("等待 {}", selector), BrowserAction::Execute { .. } => "执行脚本".to_string(), BrowserAction::GetSource => "获取页面源码".to_string(), BrowserAction::GetUrl => "获取当前URL".to_string(), BrowserAction::GetTitle => "获取页面标题".to_string(), BrowserAction::Scroll { x, y, .. } => format!("滚动到 ({},{})", x, y), BrowserAction::Back => "后退".to_string(), BrowserAction::Forward => "前进".to_string(), BrowserAction::Refresh => "刷新".to_string(), BrowserAction::Hover { selector } => format!("悬停 {}", selector), BrowserAction::PressKey { key } => format!("按键 {}", key), BrowserAction::Upload { selector, .. } => format!("上传文件到 {}", selector), } } } /// Form field definition #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FormField { pub selector: String, pub value: String, } fn default_timeout() -> u64 { 10000 } /// Browser Hand implementation pub struct BrowserHand { config: HandConfig, } impl BrowserHand { /// Create a new Browser Hand pub fn new() -> Self { Self { config: HandConfig { id: "browser".to_string(), name: "浏览器".to_string(), description: "网页浏览器自动化,支持导航、交互和数据采集".to_string(), needs_approval: true, dependencies: vec!["webdriver".to_string()], input_schema: Some(serde_json::json!({ "type": "object", "properties": { "action": { "type": "string", "enum": ["navigate", "click", "type", "scrape", "screenshot", "fill_form", "wait", "execute"] }, "url": { "type": "string" }, "selector": { "type": "string" }, "text": { "type": "string" }, "selectors": { "type": "array", "items": { "type": "string" } }, "script": { "type": "string" } }, "required": ["action"] })), tags: vec!["automation".to_string(), "web".to_string(), "browser".to_string()], enabled: true, max_concurrent: 0, timeout_secs: 0, }, } } /// Check if WebDriver is available by probing common ports fn check_webdriver(&self) -> bool { use std::net::TcpStream; use std::time::Duration; // Probe default WebDriver ports: ChromeDriver (9515), GeckoDriver (4444), Edge (17556) let ports = [9515, 4444, 17556]; for port in ports { let addr = format!("127.0.0.1:{}", port); if let Ok(addr) = addr.parse() { if TcpStream::connect_timeout(&addr, Duration::from_millis(500)).is_ok() { return true; } } } false } } impl Default for BrowserHand { fn default() -> Self { Self::new() } } #[async_trait] impl Hand for BrowserHand { fn config(&self) -> &HandConfig { &self.config } async fn execute(&self, _context: &HandContext, input: Value) -> Result { let action: BrowserAction = match serde_json::from_value(input) { Ok(a) => a, Err(e) => return Ok(HandResult::error(format!("Invalid action: {}", e))), }; let action_type = action.action_name(); let summary = action.summary(); // Check if WebDriver is available if !self.check_webdriver() { return Ok(HandResult::error(format!( "浏览器操作「{}」无法执行:未检测到 WebDriver (ChromeDriver/GeckoDriver)。请先启动 WebDriver 服务。", summary ))); } // WebDriver is running — delegate to frontend BrowserHandCard. // The frontend manages the Fantoccini session lifecycle. Ok(HandResult::success(serde_json::json!({ "action": action_type, "status": "delegated_to_frontend", "message": format!("浏览器操作「{}」已发送到前端执行。WebDriver 已就绪。", summary), "details": format!("{} — 由前端 BrowserHandCard 通过 Fantoccini 执行。", summary), }))) } fn is_dependency_available(&self, dep: &str) -> bool { match dep { "webdriver" => self.check_webdriver(), _ => true, } } fn status(&self) -> HandStatus { if self.check_webdriver() { HandStatus::Idle } else { HandStatus::PendingApproval // Using this to indicate dependency missing } } } /// Browser automation sequence for complex operations #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BrowserSequence { /// Sequence name pub name: String, /// Steps to execute pub steps: Vec, /// Whether to stop on error #[serde(default = "default_stop_on_error")] pub stop_on_error: bool, /// Delay between steps in milliseconds #[serde(default)] pub step_delay_ms: Option, } fn default_stop_on_error() -> bool { true } impl BrowserSequence { /// Create a new browser sequence pub fn new(name: impl Into) -> Self { Self { name: name.into(), steps: Vec::new(), stop_on_error: true, step_delay_ms: None, } } /// Add a navigate step pub fn navigate(mut self, url: impl Into) -> Self { self.steps.push(BrowserAction::Navigate { url: url.into(), wait_for: None }); self } /// Add a click step pub fn click(mut self, selector: impl Into) -> Self { self.steps.push(BrowserAction::Click { selector: selector.into(), wait_ms: None }); self } /// Add a type step pub fn type_text(mut self, selector: impl Into, text: impl Into) -> Self { self.steps.push(BrowserAction::Type { selector: selector.into(), text: text.into(), clear_first: false, }); self } /// Add a wait step pub fn wait(mut self, selector: impl Into, timeout_ms: u64) -> Self { self.steps.push(BrowserAction::Wait { selector: selector.into(), timeout_ms }); self } /// Add a screenshot step pub fn screenshot(mut self) -> Self { self.steps.push(BrowserAction::Screenshot { selector: None, full_page: false }); self } /// Set whether to stop on error pub fn stop_on_error(mut self, stop: bool) -> Self { self.stop_on_error = stop; self } /// Build the sequence pub fn build(self) -> Vec { self.steps } } #[cfg(test)] mod tests { use super::*; use crate::Hand; use std::collections::HashMap; fn fresh_context() -> HandContext { HandContext { agent_id: zclaw_types::AgentId::new(), working_dir: None, env: HashMap::new(), timeout_secs: 30, callback_url: None, } } #[test] fn test_browser_config() { let hand = BrowserHand::new(); let config = hand.config(); assert_eq!(config.id, "browser"); assert!(config.enabled); } #[tokio::test] async fn test_browser_config_needs_approval() { let hand = BrowserHand::new(); assert!(hand.config().needs_approval, "Browser hand should require approval per TOML config"); } #[test] fn test_action_deserialize_navigate() { let json = serde_json::json!({ "action": "navigate", "url": "https://example.com", "wait_for": "body" }); let action: BrowserAction = serde_json::from_value(json).expect("deserialize navigate"); match action { BrowserAction::Navigate { url, wait_for } => { assert_eq!(url, "https://example.com"); assert_eq!(wait_for, Some("body".to_string())); } _ => panic!("Expected Navigate action, got {:?}", action), } } #[test] fn test_action_deserialize_click() { let json = serde_json::json!({ "action": "click", "selector": "#submit-btn", "wait_ms": 500 }); let action: BrowserAction = serde_json::from_value(json).expect("deserialize click"); match action { BrowserAction::Click { selector, wait_ms } => { assert_eq!(selector, "#submit-btn"); assert_eq!(wait_ms, Some(500)); } _ => panic!("Expected Click action, got {:?}", action), } } #[test] fn test_action_deserialize_type() { let json = serde_json::json!({ "action": "type", "selector": "#search", "text": "hello world", "clear_first": true }); let action: BrowserAction = serde_json::from_value(json).expect("deserialize type"); match action { BrowserAction::Type { selector, text, clear_first } => { assert_eq!(selector, "#search"); assert_eq!(text, "hello world"); assert!(clear_first); } _ => panic!("Expected Type action, got {:?}", action), } } #[test] fn test_action_deserialize_scrape() { let json = serde_json::json!({ "action": "scrape", "selectors": ["h1", ".content", "#price"] }); let action: BrowserAction = serde_json::from_value(json).expect("deserialize scrape"); match action { BrowserAction::Scrape { selectors, wait_for } => { assert_eq!(selectors, vec!["h1", ".content", "#price"]); assert!(wait_for.is_none()); } _ => panic!("Expected Scrape action, got {:?}", action), } } #[test] fn test_action_deserialize_screenshot() { let json = serde_json::json!({ "action": "screenshot", "full_page": true }); let action: BrowserAction = serde_json::from_value(json).expect("deserialize screenshot"); match action { BrowserAction::Screenshot { selector, full_page } => { assert!(selector.is_none()); assert!(full_page); } _ => panic!("Expected Screenshot action, got {:?}", action), } } #[test] fn test_all_major_actions_roundtrip() { let actions = vec![ BrowserAction::Navigate { url: "https://example.com".into(), wait_for: None }, BrowserAction::Click { selector: "#btn".into(), wait_ms: None }, BrowserAction::Type { selector: "#input".into(), text: "test".into(), clear_first: false }, BrowserAction::Scrape { selectors: vec!["h1".into()], wait_for: None }, BrowserAction::Screenshot { selector: None, full_page: false }, BrowserAction::Wait { selector: "#loaded".into(), timeout_ms: 5000 }, BrowserAction::Execute { script: "return 1".into(), args: vec![] }, BrowserAction::FillForm { fields: vec![FormField { selector: "#name".into(), value: "Alice".into() }], submit_selector: Some("#submit".into()), }, ]; for original in actions { let json = serde_json::to_value(&original).expect("serialize action"); let roundtripped: BrowserAction = serde_json::from_value(json).expect("deserialize action"); assert_eq!( serde_json::to_value(&original).unwrap(), serde_json::to_value(&roundtripped).unwrap(), "Roundtrip failed for {:?}", original ); } } #[tokio::test] async fn test_browser_sequence_builder() { let ctx = fresh_context(); let hand = BrowserHand::new(); let sequence = BrowserSequence::new("test_sequence") .navigate("https://example.com") .stop_on_error(false); assert_eq!(sequence.name, "test_sequence"); assert!(!sequence.stop_on_error); assert_eq!(sequence.steps.len(), 1); // Execute the navigate step — without WebDriver running, should report error let action_json = serde_json::to_value(&sequence.steps[0]).expect("serialize step"); let result = hand.execute(&ctx, action_json).await.expect("execute"); // In test env no WebDriver is running, so we get an error about missing WebDriver if result.success { assert_eq!(result.output["action"], "navigate"); assert_eq!(result.output["status"], "delegated_to_frontend"); } else { assert!(result.error.as_deref().unwrap_or("").contains("WebDriver")); } } #[tokio::test] async fn test_browser_sequence_multiple_steps() { let ctx = fresh_context(); let hand = BrowserHand::new(); let sequence = BrowserSequence::new("multi_step") .navigate("https://example.com") .click("#login-btn") .type_text("#username", "admin") .screenshot(); assert_eq!(sequence.steps.len(), 4); // Verify each step can parse and execute (or report missing WebDriver) for (i, step) in sequence.steps.iter().enumerate() { let action_json = serde_json::to_value(step).expect("serialize step"); let result = hand.execute(&ctx, action_json).await.expect("execute step"); // Without WebDriver, all steps should report the error cleanly if !result.success { assert!( result.error.as_deref().unwrap_or("").contains("WebDriver"), "Step {} unexpected error: {:?}", i, result.error ); } } } #[test] fn test_form_field_deserialize() { let json = serde_json::json!({ "selector": "#email", "value": "user@example.com" }); let field: FormField = serde_json::from_value(json).expect("deserialize form field"); assert_eq!(field.selector, "#email"); assert_eq!(field.value, "user@example.com"); } }