Files
zclaw_openfang/desktop/src-tauri/src/browser/commands.rs
iven 1c99e5f3a3
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
fix(browser): stability enhancements + MCP frontend client
S7 Browser Hand:
- Remove dead code: browser/actions.rs (314 lines of unused BrowserAction/ActionResult types)
- Fix browser_scrape_page: log failed selector matches instead of silently swallowing errors
- Fix element_to_info: document known limitation for always-None location/size fields
- Fix browserHandStore: reuse activeSessionId in executeScript/takeScreenshot/executeTemplate
  instead of creating orphan Browser sessions
- Add Browser.connect(sessionId) method for session reuse

MCP Frontend:
- Add desktop/src/lib/mcp-client.ts (77 lines) — typed client for MCP Tauri commands
  (startMcpService, stopMcpService, listMcpServices, callMcpTool)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-03 22:16:12 +08:00

562 lines
15 KiB
Rust

// Tauri commands for browser automation
use crate::browser::client::BrowserClient;
use crate::browser::session::{BrowserType, SessionConfig};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock;
use tauri::State;
/// Global browser client state
pub struct BrowserState {
client: Arc<RwLock<BrowserClient>>,
}
impl BrowserState {
pub fn new() -> Self {
Self {
client: Arc::new(RwLock::new(BrowserClient::new())),
}
}
}
impl Default for BrowserState {
fn default() -> Self {
Self::new()
}
}
impl Clone for BrowserState {
fn clone(&self) -> Self {
Self {
client: Arc::clone(&self.client),
}
}
}
// ============================================================================
// Session Management Commands
// ============================================================================
/// Create a new browser session
// @connected
#[tauri::command]
pub async fn browser_create_session(
state: State<'_, BrowserState>,
webdriver_url: Option<String>,
headless: Option<bool>,
browser_type: Option<String>,
window_width: Option<u32>,
window_height: Option<u32>,
) -> Result<BrowserSessionResult, String> {
let browser_type = match browser_type.as_deref() {
Some("firefox") => BrowserType::Firefox,
Some("edge") => BrowserType::Edge,
Some("safari") => BrowserType::Safari,
_ => BrowserType::Chrome,
};
let config = SessionConfig {
webdriver_url: webdriver_url.unwrap_or_else(|| "http://localhost:4444".to_string()),
browser_type,
headless: headless.unwrap_or(true),
window_size: window_width.zip(window_height),
..Default::default()
};
let client = state.client.read().await;
let session_id = client.create_session(config).await.map_err(|e| e.to_string())?;
Ok(BrowserSessionResult { session_id })
}
/// Close a browser session
// @connected
#[tauri::command]
pub async fn browser_close_session(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.close_session(&session_id).await.map_err(|e| e.to_string())
}
/// List all browser sessions
// @connected
#[tauri::command]
pub async fn browser_list_sessions(
state: State<'_, BrowserState>,
) -> Result<Vec<BrowserSessionInfo>, String> {
let client = state.client.read().await;
let sessions = client.list_sessions().await;
Ok(sessions
.into_iter()
.map(|s| BrowserSessionInfo {
id: s.id,
name: s.name,
current_url: s.current_url,
title: s.title,
status: format!("{:?}", s.status).to_lowercase(),
created_at: s.created_at.to_rfc3339(),
last_activity: s.last_activity.to_rfc3339(),
})
.collect())
}
/// Get session info
// @connected
#[tauri::command]
pub async fn browser_get_session(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<BrowserSessionInfo, String> {
let client = state.client.read().await;
let session = client.get_session(&session_id).await.map_err(|e| e.to_string())?;
Ok(BrowserSessionInfo {
id: session.id,
name: session.name,
current_url: session.current_url,
title: session.title,
status: format!("{:?}", session.status).to_lowercase(),
created_at: session.created_at.to_rfc3339(),
last_activity: session.last_activity.to_rfc3339(),
})
}
// ============================================================================
// Navigation Commands
// ============================================================================
/// Navigate to URL
// @connected
#[tauri::command]
pub async fn browser_navigate(
state: State<'_, BrowserState>,
session_id: String,
url: String,
) -> Result<BrowserNavigationResult, String> {
let client = state.client.read().await;
let result = client.navigate(&session_id, &url).await.map_err(|e| e.to_string())?;
Ok(BrowserNavigationResult {
url: result.url,
title: result.title,
})
}
/// Go back
// @connected
#[tauri::command]
pub async fn browser_back(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.back(&session_id).await.map_err(|e| e.to_string())
}
/// Go forward
// @connected
#[tauri::command]
pub async fn browser_forward(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.forward(&session_id).await.map_err(|e| e.to_string())
}
/// Refresh page
// @connected
#[tauri::command]
pub async fn browser_refresh(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.refresh(&session_id).await.map_err(|e| e.to_string())
}
/// Get current URL
// @connected
#[tauri::command]
pub async fn browser_get_url(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<String, String> {
let client = state.client.read().await;
client.get_current_url(&session_id).await.map_err(|e| e.to_string())
}
/// Get page title
// @connected
#[tauri::command]
pub async fn browser_get_title(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<String, String> {
let client = state.client.read().await;
client.get_title(&session_id).await.map_err(|e| e.to_string())
}
// ============================================================================
// Element Interaction Commands
// ============================================================================
/// Find element
// @connected
#[tauri::command]
pub async fn browser_find_element(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<BrowserElementInfo, String> {
let client = state.client.read().await;
let element = client.find_element(&session_id, &selector).await.map_err(|e| e.to_string())?;
Ok(BrowserElementInfo {
selector: element.selector,
tag_name: element.tag_name,
text: element.text,
is_displayed: element.is_displayed,
is_enabled: element.is_enabled,
is_selected: element.is_selected,
location: element.location.map(|l| BrowserElementLocation { x: l.x, y: l.y }),
size: element.size.map(|s| BrowserElementSize {
width: s.width,
height: s.height,
}),
})
}
/// Find multiple elements
// @connected
#[tauri::command]
pub async fn browser_find_elements(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<Vec<BrowserElementInfo>, String> {
let client = state.client.read().await;
let elements = client.find_elements(&session_id, &selector).await.map_err(|e| e.to_string())?;
Ok(elements
.into_iter()
.map(|e| BrowserElementInfo {
selector: e.selector,
tag_name: e.tag_name,
text: e.text,
is_displayed: e.is_displayed,
is_enabled: e.is_enabled,
is_selected: e.is_selected,
location: e.location.map(|l| BrowserElementLocation { x: l.x, y: l.y }),
size: e.size.map(|s| BrowserElementSize {
width: s.width,
height: s.height,
}),
})
.collect())
}
/// Click element
// @connected
#[tauri::command]
pub async fn browser_click(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.click(&session_id, &selector).await.map_err(|e| e.to_string())
}
/// Type text into element
// @connected
#[tauri::command]
pub async fn browser_type(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
text: String,
clear_first: Option<bool>,
) -> Result<(), String> {
let client = state.client.read().await;
if clear_first.unwrap_or(false) {
client
.clear_and_type(&session_id, &selector, &text)
.await
.map_err(|e| e.to_string())
} else {
client
.type_text(&session_id, &selector, &text)
.await
.map_err(|e| e.to_string())
}
}
/// Get element text
// @connected
#[tauri::command]
pub async fn browser_get_text(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<String, String> {
let client = state.client.read().await;
client.get_text(&session_id, &selector).await.map_err(|e| e.to_string())
}
/// Get element attribute
// @connected
#[tauri::command]
pub async fn browser_get_attribute(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
attribute: String,
) -> Result<Option<String>, String> {
let client = state.client.read().await;
client
.get_attribute(&session_id, &selector, &attribute)
.await
.map_err(|e| e.to_string())
}
/// Wait for element
// @connected
#[tauri::command]
pub async fn browser_wait_for_element(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
timeout_ms: Option<u64>,
) -> Result<BrowserElementInfo, String> {
let client = state.client.read().await;
let element = client
.wait_for_element(&session_id, &selector, timeout_ms.unwrap_or(10000))
.await
.map_err(|e| e.to_string())?;
Ok(BrowserElementInfo {
selector: element.selector,
tag_name: element.tag_name,
text: element.text,
is_displayed: element.is_displayed,
is_enabled: element.is_enabled,
is_selected: element.is_selected,
location: element.location.map(|l| BrowserElementLocation { x: l.x, y: l.y }),
size: element.size.map(|s| BrowserElementSize {
width: s.width,
height: s.height,
}),
})
}
// ============================================================================
// Advanced Commands
// ============================================================================
/// Execute JavaScript
// @connected
#[tauri::command]
pub async fn browser_execute_script(
state: State<'_, BrowserState>,
session_id: String,
script: String,
args: Option<Vec<serde_json::Value>>,
) -> Result<serde_json::Value, String> {
let client = state.client.read().await;
client
.execute_script(&session_id, &script, args.unwrap_or_default())
.await
.map_err(|e| e.to_string())
}
/// Take screenshot
// @connected
#[tauri::command]
pub async fn browser_screenshot(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<BrowserScreenshotResult, String> {
let client = state.client.read().await;
let result = client.screenshot(&session_id).await.map_err(|e| e.to_string())?;
Ok(BrowserScreenshotResult {
base64: result.base64,
format: result.format,
})
}
/// Take element screenshot
// @connected
#[tauri::command]
pub async fn browser_element_screenshot(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<BrowserScreenshotResult, String> {
let client = state.client.read().await;
let result = client
.element_screenshot(&session_id, &selector)
.await
.map_err(|e| e.to_string())?;
Ok(BrowserScreenshotResult {
base64: result.base64,
format: result.format,
})
}
/// Get page source
// @connected
#[tauri::command]
pub async fn browser_get_source(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<String, String> {
let client = state.client.read().await;
client.get_source(&session_id).await.map_err(|e| e.to_string())
}
// ============================================================================
// High-Level Task Commands (for Hands integration)
// ============================================================================
/// Scrape page content
// @connected
#[tauri::command]
pub async fn browser_scrape_page(
state: State<'_, BrowserState>,
session_id: String,
selectors: Vec<String>,
wait_for: Option<String>,
timeout_ms: Option<u64>,
) -> Result<serde_json::Value, String> {
let client = state.client.read().await;
// Wait for element if specified
if let Some(selector) = wait_for {
client
.wait_for_element(&session_id, &selector, timeout_ms.unwrap_or(10000))
.await
.map_err(|e| e.to_string())?;
}
// Extract content from all selectors
let mut results = serde_json::Map::new();
for selector in selectors {
match client.find_elements(&session_id, &selector).await {
Ok(elements) => {
let texts: Vec<String> = elements.iter().filter_map(|e| e.text.clone()).collect();
results.insert(selector, serde_json::json!(texts));
}
Err(e) => {
tracing::warn!(
selector = %selector,
error = %e,
"browser_scrape_page: find_elements failed, skipping selector"
);
}
}
}
Ok(serde_json::Value::Object(results))
}
/// Fill form
// @connected
#[tauri::command]
pub async fn browser_fill_form(
state: State<'_, BrowserState>,
session_id: String,
fields: Vec<FormFieldData>,
submit_selector: Option<String>,
) -> Result<(), String> {
let client = state.client.read().await;
// Fill each field
for field in fields {
client
.clear_and_type(&session_id, &field.selector, &field.value)
.await
.map_err(|e| e.to_string())?;
}
// Submit form if selector provided
if let Some(selector) = submit_selector {
client
.click(&session_id, &selector)
.await
.map_err(|e| e.to_string())?;
}
Ok(())
}
// ============================================================================
// Response Types
// ============================================================================
#[derive(Debug, Serialize)]
pub struct BrowserSessionResult {
pub session_id: String,
}
#[derive(Debug, Serialize)]
pub struct BrowserSessionInfo {
pub id: String,
pub name: String,
pub current_url: Option<String>,
pub title: Option<String>,
pub status: String,
pub created_at: String,
pub last_activity: String,
}
#[derive(Debug, Serialize)]
pub struct BrowserNavigationResult {
pub url: Option<String>,
pub title: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct BrowserElementInfo {
pub selector: String,
pub tag_name: Option<String>,
pub text: Option<String>,
pub is_displayed: bool,
pub is_enabled: bool,
pub is_selected: bool,
pub location: Option<BrowserElementLocation>,
pub size: Option<BrowserElementSize>,
}
#[derive(Debug, Serialize)]
pub struct BrowserElementLocation {
pub x: i32,
pub y: i32,
}
#[derive(Debug, Serialize)]
pub struct BrowserElementSize {
pub width: u64,
pub height: u64,
}
#[derive(Debug, Serialize)]
pub struct BrowserScreenshotResult {
pub base64: String,
pub format: String,
}
#[derive(Debug, Deserialize)]
pub struct FormFieldData {
pub selector: String,
pub value: String,
}