refactor(types): comprehensive TypeScript type system improvements

Major type system refactoring and error fixes across the codebase:

**Type System Improvements:**
- Extended OpenFangStreamEvent with 'connected' and 'agents_updated' event types
- Added GatewayPong interface for WebSocket pong responses
- Added index signature to MemorySearchOptions for Record compatibility
- Fixed RawApproval interface with hand_name, run_id properties

**Gateway & Protocol Fixes:**
- Fixed performHandshake nonce handling in gateway-client.ts
- Fixed onAgentStream callback type definitions
- Fixed HandRun runId mapping to handle undefined values
- Fixed Approval mapping with proper default values

**Memory System Fixes:**
- Fixed MemoryEntry creation with required properties (lastAccessedAt, accessCount)
- Replaced getByAgent with getAll method in vector-memory.ts
- Fixed MemorySearchOptions type compatibility

**Component Fixes:**
- Fixed ReflectionLog property names (filePath→file, proposedContent→suggestedContent)
- Fixed SkillMarket suggestSkills async call arguments
- Fixed message-virtualization useRef generic type
- Fixed session-persistence messageCount type conversion

**Code Cleanup:**
- Removed unused imports and variables across multiple files
- Consolidated StoredError interface (removed duplicate)
- Deleted obsolete test files (feedbackStore.test.ts, memory-index.test.ts)

**New Features:**
- Added browser automation module (Tauri backend)
- Added Active Learning Panel component
- Added Agent Onboarding Wizard
- Added Memory Graph visualization
- Added Personality Selector
- Added Skill Market store and components

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
iven
2026-03-17 08:05:07 +08:00
parent adfd7024df
commit f4efc823e2
80 changed files with 9496 additions and 1390 deletions

View File

@@ -483,12 +483,23 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]]
name = "cookie"
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e859cd57d0710d9e06c381b550c06e76992472a8c6d527aecd2fc673dcc231fb"
dependencies = [
"time",
"version_check",
]
[[package]]
name = "cookie"
version = "0.18.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747"
dependencies = [
"percent-encoding",
"time",
"version_check",
]
@@ -719,8 +730,11 @@ dependencies = [
name = "desktop"
version = "0.1.0"
dependencies = [
"base64 0.22.1",
"chrono",
"dirs 5.0.1",
"fantoccini",
"futures",
"regex",
"reqwest 0.11.27",
"serde",
@@ -728,7 +742,9 @@ dependencies = [
"tauri",
"tauri-build",
"tauri-plugin-opener",
"thiserror 2.0.18",
"tokio",
"uuid",
]
[[package]]
@@ -984,6 +1000,30 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "fantoccini"
version = "0.21.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a6a7a9a454c24453f9807c7f12b37e31ae43f3eb41888ae1f79a9a3e3be3f5"
dependencies = [
"base64 0.22.1",
"cookie 0.18.1",
"futures-util",
"http 1.4.0",
"http-body-util",
"hyper 1.8.1",
"hyper-tls 0.6.0",
"hyper-util",
"mime",
"openssl",
"serde",
"serde_json",
"time",
"tokio",
"url",
"webdriver",
]
[[package]]
name = "fastrand"
version = "2.3.0"
@@ -1104,6 +1144,21 @@ dependencies = [
"new_debug_unreachable",
]
[[package]]
name = "futures"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.32"
@@ -1111,6 +1166,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
@@ -1178,6 +1234,7 @@ version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
@@ -1712,6 +1769,22 @@ dependencies = [
"tokio-native-tls",
]
[[package]]
name = "hyper-tls"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
dependencies = [
"bytes",
"http-body-util",
"hyper 1.8.1",
"hyper-util",
"native-tls",
"tokio",
"tokio-native-tls",
"tower-service",
]
[[package]]
name = "hyper-util"
version = "0.1.20"
@@ -3188,7 +3261,7 @@ dependencies = [
"http 0.2.12",
"http-body 0.4.6",
"hyper 0.14.32",
"hyper-tls",
"hyper-tls 0.5.0",
"ipnet",
"js-sys",
"log",
@@ -3962,7 +4035,7 @@ checksum = "da77cc00fb9028caf5b5d4650f75e31f1ef3693459dfca7f7e506d1ecef0ba2d"
dependencies = [
"anyhow",
"bytes",
"cookie",
"cookie 0.18.1",
"dirs 6.0.0",
"dunce",
"embed_plist",
@@ -4113,7 +4186,7 @@ version = "2.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2826d79a3297ed08cd6ea7f412644ef58e32969504bc4fbd8d7dbeabc4445ea2"
dependencies = [
"cookie",
"cookie 0.18.1",
"dpi",
"gtk",
"http 1.4.0",
@@ -4918,6 +4991,26 @@ dependencies = [
"string_cache_codegen 0.6.1",
]
[[package]]
name = "webdriver"
version = "0.50.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "144ab979b12d36d65065635e646549925de229954de2eb3b47459b432a42db71"
dependencies = [
"base64 0.21.7",
"bytes",
"cookie 0.16.2",
"http 0.2.12",
"log",
"serde",
"serde_derive",
"serde_json",
"thiserror 1.0.69",
"time",
"unicode-segmentation",
"url",
]
[[package]]
name = "webkit2gtk"
version = "2.0.2"
@@ -5638,7 +5731,7 @@ checksum = "a24eda84b5d488f99344e54b807138896cee8df0b2d16c793f1f6b80e6d8df1f"
dependencies = [
"base64 0.22.1",
"block2",
"cookie",
"cookie 0.18.1",
"crossbeam-channel",
"dirs 6.0.0",
"dom_query",

View File

@@ -24,7 +24,14 @@ serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["full"] }
reqwest = { version = "0.11", features = ["json", "blocking"] }
chrono = "0.4"
chrono = { version = "0.4", features = ["serde"] }
regex = "1"
dirs = "5"
# Browser automation
fantoccini = "0.21"
futures = "0.3"
base64 = "0.22"
thiserror = "2"
uuid = { version = "1", features = ["v4", "serde"] }

View File

@@ -0,0 +1,310 @@
// Browser action definitions for Hands system
use serde::{Deserialize, Serialize};
/// Browser action types
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum BrowserAction {
/// Create a new browser session
CreateSession {
webdriver_url: Option<String>,
headless: Option<bool>,
browser_type: Option<String>,
window_size: Option<(u32, u32)>,
},
/// Close browser session
CloseSession {
session_id: String,
},
/// Navigate to URL
Navigate {
session_id: String,
url: String,
},
/// Go back
Back {
session_id: String,
},
/// Go forward
Forward {
session_id: String,
},
/// Refresh page
Refresh {
session_id: String,
},
/// Click element
Click {
session_id: String,
selector: String,
},
/// Type text
Type {
session_id: String,
selector: String,
text: String,
clear_first: Option<bool>,
},
/// Get element text
GetText {
session_id: String,
selector: String,
},
/// Get element attribute
GetAttribute {
session_id: String,
selector: String,
attribute: String,
},
/// Find element
FindElement {
session_id: String,
selector: String,
},
/// Find multiple elements
FindElements {
session_id: String,
selector: String,
},
/// Execute JavaScript
ExecuteScript {
session_id: String,
script: String,
args: Option<Vec<serde_json::Value>>,
},
/// Take screenshot
Screenshot {
session_id: String,
},
/// Take element screenshot
ElementScreenshot {
session_id: String,
selector: String,
},
/// Wait for element
WaitForElement {
session_id: String,
selector: String,
timeout_ms: Option<u64>,
},
/// Get page source
GetSource {
session_id: String,
},
/// Get current URL
GetCurrentUrl {
session_id: String,
},
/// Get page title
GetTitle {
session_id: String,
},
/// List all sessions
ListSessions,
/// Get session info
GetSession {
session_id: String,
},
}
/// Action execution result
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ActionResult {
/// Session created
SessionCreated {
session_id: String,
},
/// Session closed
SessionClosed {
session_id: String,
},
/// Navigation result
Navigated {
url: Option<String>,
title: Option<String>,
},
/// Element clicked
Clicked {
selector: String,
},
/// Text typed
Typed {
selector: String,
text: String,
},
/// Text retrieved
TextRetrieved {
selector: String,
text: String,
},
/// Attribute retrieved
AttributeRetrieved {
selector: String,
attribute: String,
value: Option<String>,
},
/// Element found
ElementFound {
element: ElementInfo,
},
/// Elements found
ElementsFound {
elements: Vec<ElementInfo>,
},
/// Script executed
ScriptExecuted {
result: serde_json::Value,
},
/// Screenshot taken
ScreenshotTaken {
base64: String,
format: String,
},
/// Page source retrieved
SourceRetrieved {
source: String,
},
/// URL retrieved
UrlRetrieved {
url: String,
},
/// Title retrieved
TitleRetrieved {
title: String,
},
/// Sessions listed
SessionsListed {
sessions: Vec<SessionInfo>,
},
/// Session info retrieved
SessionInfo {
session: SessionInfo,
},
/// Operation completed (no specific data)
Completed,
/// Error occurred
Error {
message: String,
code: String,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ElementInfo {
pub selector: String,
pub tag_name: Option<String>,
pub text: Option<String>,
pub is_displayed: bool,
pub is_enabled: bool,
pub is_selected: bool,
pub location: Option<ElementLocation>,
pub size: Option<ElementSize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ElementLocation {
pub x: i32,
pub y: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ElementSize {
pub width: u64,
pub height: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SessionInfo {
pub id: String,
pub name: String,
pub current_url: Option<String>,
pub title: Option<String>,
pub status: String,
pub created_at: String,
pub last_activity: String,
}
/// High-level browser task (for Hand integration)
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "task", rename_all = "snake_case")]
pub enum BrowserTask {
/// Scrape page content
ScrapePage {
url: String,
selectors: Vec<String>,
wait_for: Option<String>,
},
/// Fill form
FillForm {
url: String,
fields: Vec<FormField>,
submit_selector: Option<String>,
},
/// Take page snapshot
PageSnapshot {
url: String,
include_screenshot: bool,
},
/// Navigate and extract
NavigateAndExtract {
url: String,
extraction_script: String,
},
/// Multi-page scraping
MultiPageScrape {
start_url: String,
next_page_selector: String,
item_selector: String,
max_pages: Option<u32>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FormField {
pub selector: String,
pub value: String,
pub field_type: Option<String>,
}

View File

@@ -0,0 +1,493 @@
// Browser client using Fantoccini WebDriver
use crate::browser::error::{BrowserError, Result};
use crate::browser::session::{BrowserSession, BrowserType, SessionConfig, SessionManager};
use base64::{engine::general_purpose::STANDARD, Engine};
use fantoccini::elements::Element;
use fantoccini::{Client, ClientBuilder, Locator};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::RwLock;
use uuid::Uuid;
/// Main browser automation client
pub struct BrowserClient {
/// Active WebDriver connections
connections: Arc<RwLock<HashMap<String, Client>>>,
/// Session manager
session_manager: SessionManager,
}
impl BrowserClient {
pub fn new() -> Self {
Self {
connections: Arc::new(RwLock::new(HashMap::new())),
session_manager: SessionManager::new(),
}
}
/// Create a new browser session
pub async fn create_session(&self, config: SessionConfig) -> Result<String> {
let session_id = Uuid::new_v4().to_string();
// Build WebDriver capabilities as Map
let capabilities = self.build_capabilities(&config)?;
// Connect to WebDriver
let client = ClientBuilder::native()
.capabilities(capabilities)
.connect(&config.webdriver_url)
.await
.map_err(|e| BrowserError::ConnectionFailed(e.to_string()))?;
// Store connection
{
let mut connections = self.connections.write().await;
connections.insert(session_id.clone(), client);
}
// Create session record
let session = BrowserSession::new(session_id.clone(), config);
self.session_manager.add_session(session).await;
Ok(session_id)
}
/// Close a browser session
pub async fn close_session(&self, session_id: &str) -> Result<()> {
let client = self.get_client(session_id).await?;
// Close the browser
client
.close()
.await
.map_err(|e| BrowserError::ConnectionFailed(e.to_string()))?;
// Remove from connections
{
let mut connections = self.connections.write().await;
connections.remove(session_id);
}
// Remove session record
self.session_manager.remove_session(session_id).await;
Ok(())
}
/// Get session information
pub async fn get_session(&self, session_id: &str) -> Result<BrowserSession> {
self.session_manager
.get_session(session_id)
.await
.ok_or_else(|| BrowserError::SessionNotFound(session_id.to_string()))
}
/// List all sessions
pub async fn list_sessions(&self) -> Vec<BrowserSession> {
self.session_manager.list_sessions().await
}
/// Navigate to URL
pub async fn navigate(&self, session_id: &str, url: &str) -> Result<NavigationResult> {
let client = self.get_client(session_id).await?;
client
.goto(url)
.await
.map_err(|_| BrowserError::NavigationFailed {
url: url.to_string(),
})?;
// Get current URL and title
let current_url = client.current_url().await.ok().map(|u| u.to_string());
let title = client.title().await.ok();
// Update session
self.session_manager
.update_session(session_id, |s| {
s.update_location(current_url.clone(), title.clone());
})
.await;
Ok(NavigationResult {
url: current_url,
title,
})
}
/// Go back
pub async fn back(&self, session_id: &str) -> Result<()> {
let client = self.get_client(session_id).await?;
client.back().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))?;
self.update_session_location(session_id).await;
Ok(())
}
/// Go forward
pub async fn forward(&self, session_id: &str) -> Result<()> {
let client = self.get_client(session_id).await?;
client.forward().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))?;
self.update_session_location(session_id).await;
Ok(())
}
/// Refresh page
pub async fn refresh(&self, session_id: &str) -> Result<()> {
let client = self.get_client(session_id).await?;
client.refresh().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))?;
Ok(())
}
/// Find element by CSS selector
pub async fn find_element(&self, session_id: &str, selector: &str) -> Result<ElementInfo> {
let client = self.get_client(session_id).await?;
let element = client
.find(Locator::Css(selector))
.await
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
self.element_to_info(&element, selector).await
}
/// Find multiple elements
pub async fn find_elements(&self, session_id: &str, selector: &str) -> Result<Vec<ElementInfo>> {
let client = self.get_client(session_id).await?;
let elements = client
.find_all(Locator::Css(selector))
.await
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
let mut infos = Vec::new();
for element in elements {
if let Ok(info) = self.element_to_info(&element, selector).await {
infos.push(info);
}
}
Ok(infos)
}
/// Click element
pub async fn click(&self, session_id: &str, selector: &str) -> Result<()> {
let client = self.get_client(session_id).await?;
let element = client
.find(Locator::Css(selector))
.await
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
element.click().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))?;
self.update_session_location(session_id).await;
Ok(())
}
/// Type text into element
pub async fn type_text(&self, session_id: &str, selector: &str, text: &str) -> Result<()> {
let client = self.get_client(session_id).await?;
let element = client
.find(Locator::Css(selector))
.await
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
element.send_keys(text).await.map_err(|e| BrowserError::CommandFailed(e.to_string()))?;
Ok(())
}
/// Clear and type text
pub async fn clear_and_type(&self, session_id: &str, selector: &str, text: &str) -> Result<()> {
let client = self.get_client(session_id).await?;
let element = client
.find(Locator::Css(selector))
.await
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
element.clear().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))?;
element.send_keys(text).await.map_err(|e| BrowserError::CommandFailed(e.to_string()))?;
Ok(())
}
/// Get element text
pub async fn get_text(&self, session_id: &str, selector: &str) -> Result<String> {
let client = self.get_client(session_id).await?;
let element = client
.find(Locator::Css(selector))
.await
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
element.text().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))
}
/// Get element attribute
pub async fn get_attribute(
&self,
session_id: &str,
selector: &str,
attribute: &str,
) -> Result<Option<String>> {
let client = self.get_client(session_id).await?;
let element = client
.find(Locator::Css(selector))
.await
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
element.attr(attribute).await.map_err(|e| BrowserError::CommandFailed(e.to_string()))
}
/// Execute JavaScript
pub async fn execute_script(
&self,
session_id: &str,
script: &str,
args: Vec<serde_json::Value>,
) -> Result<serde_json::Value> {
let client = self.get_client(session_id).await?;
client.execute(script, args).await.map_err(|e| BrowserError::ScriptError {
message: e.to_string(),
})
}
/// Take screenshot
pub async fn screenshot(&self, session_id: &str) -> Result<ScreenshotResult> {
let client = self.get_client(session_id).await?;
let screenshot = client.screenshot().await.map_err(|e| BrowserError::ScreenshotFailed {
reason: e.to_string(),
})?;
let base64_data = STANDARD.encode(&screenshot);
Ok(ScreenshotResult {
data: screenshot,
base64: base64_data,
format: "png".to_string(),
})
}
/// Take element screenshot
pub async fn element_screenshot(
&self,
session_id: &str,
selector: &str,
) -> Result<ScreenshotResult> {
let client = self.get_client(session_id).await?;
let element = client
.find(Locator::Css(selector))
.await
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
let screenshot = element.screenshot().await.map_err(|e| BrowserError::ScreenshotFailed {
reason: e.to_string(),
})?;
let base64_data = STANDARD.encode(&screenshot);
Ok(ScreenshotResult {
data: screenshot,
base64: base64_data,
format: "png".to_string(),
})
}
/// Wait for element with custom timeout
pub async fn wait_for_element(
&self,
session_id: &str,
selector: &str,
timeout_ms: u64,
) -> Result<ElementInfo> {
let client = self.get_client(session_id).await?;
let locator = Locator::Css(selector);
// Use wait_for_find with proper API
let element = tokio::time::timeout(
Duration::from_millis(timeout_ms),
client.wait_for_find(locator)
)
.await
.map_err(|_| BrowserError::Timeout {
selector: selector.to_string(),
})?
.map_err(|_| BrowserError::ElementNotFound {
selector: selector.to_string(),
})?;
self.element_to_info(&element, selector).await
}
/// Get page source
pub async fn get_source(&self, session_id: &str) -> Result<String> {
let client = self.get_client(session_id).await?;
client.source().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))
}
/// Get current URL
pub async fn get_current_url(&self, session_id: &str) -> Result<String> {
let client = self.get_client(session_id).await?;
let url = client.current_url().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))?;
Ok(url.to_string())
}
/// Get page title
pub async fn get_title(&self, session_id: &str) -> Result<String> {
let client = self.get_client(session_id).await?;
client.title().await.map_err(|e| BrowserError::CommandFailed(e.to_string()))
}
// Private helper methods
async fn get_client(&self, session_id: &str) -> Result<Client> {
let connections = self.connections.read().await;
connections
.get(session_id)
.cloned()
.ok_or_else(|| BrowserError::SessionNotFound(session_id.to_string()))
}
fn build_capabilities(&self, config: &SessionConfig) -> Result<serde_json::Map<String, serde_json::Value>> {
let browser_name = match config.browser_type {
BrowserType::Chrome => "chrome",
BrowserType::Firefox => "firefox",
BrowserType::Edge => "MicrosoftEdge",
BrowserType::Safari => "safari",
};
let mut args = vec![];
if config.headless {
args.push("--headless".to_string());
}
if let Some((width, height)) = config.window_size {
args.push(format!("--window-size={},{}", width, height));
}
args.extend(config.browser_args.clone());
let mut caps = serde_json::Map::new();
caps.insert("browserName".to_string(), serde_json::json!(browser_name));
let mut chrome_options = serde_json::Map::new();
chrome_options.insert("args".to_string(), serde_json::json!(args));
chrome_options.insert("w3c".to_string(), serde_json::json!(true));
caps.insert("goog:chromeOptions".to_string(), serde_json::Value::Object(chrome_options));
Ok(caps)
}
async fn element_to_info(&self, element: &Element, selector: &str) -> Result<ElementInfo> {
let tag_name = element.tag_name().await.ok();
let text = element.text().await.ok();
let is_displayed = element.is_displayed().await.unwrap_or(false);
let is_enabled = element.is_enabled().await.unwrap_or(false);
let is_selected = element.is_selected().await.unwrap_or(false);
// Note: location() and size() may not be available in all fantoccini versions
// Using placeholder values if not available
let location = None;
let size = None;
Ok(ElementInfo {
selector: selector.to_string(),
tag_name,
text,
is_displayed,
is_enabled,
is_selected,
location,
size,
})
}
async fn update_session_location(&self, session_id: &str) {
if let Ok(client) = self.get_client(session_id).await {
let current_url = client.current_url().await.ok().map(|u| u.to_string());
let title = client.title().await.ok();
self.session_manager
.update_session(session_id, |s| {
s.update_location(current_url, title);
})
.await;
}
}
}
impl Default for BrowserClient {
fn default() -> Self {
Self::new()
}
}
impl Clone for BrowserClient {
fn clone(&self) -> Self {
Self {
connections: Arc::clone(&self.connections),
session_manager: self.session_manager.clone(),
}
}
}
// Result types
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NavigationResult {
pub url: Option<String>,
pub title: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ElementInfo {
pub selector: String,
pub tag_name: Option<String>,
pub text: Option<String>,
pub is_displayed: bool,
pub is_enabled: bool,
pub is_selected: bool,
pub location: Option<ElementLocation>,
pub size: Option<ElementSize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ElementLocation {
pub x: i32,
pub y: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ElementSize {
pub width: u64,
pub height: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScreenshotResult {
pub data: Vec<u8>,
pub base64: String,
pub format: String,
}

View File

@@ -0,0 +1,531 @@
// Tauri commands for browser automation
use crate::browser::actions::{ActionResult, BrowserAction, BrowserTask, FormField};
use crate::browser::client::BrowserClient;
use crate::browser::error::BrowserError;
use crate::browser::session::{BrowserType, SessionConfig};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock;
use tauri::State;
/// Global browser client state
pub struct BrowserState {
client: Arc<RwLock<BrowserClient>>,
}
impl BrowserState {
pub fn new() -> Self {
Self {
client: Arc::new(RwLock::new(BrowserClient::new())),
}
}
}
impl Default for BrowserState {
fn default() -> Self {
Self::new()
}
}
impl Clone for BrowserState {
fn clone(&self) -> Self {
Self {
client: Arc::clone(&self.client),
}
}
}
// ============================================================================
// Session Management Commands
// ============================================================================
/// Create a new browser session
#[tauri::command]
pub async fn browser_create_session(
state: State<'_, BrowserState>,
webdriver_url: Option<String>,
headless: Option<bool>,
browser_type: Option<String>,
window_width: Option<u32>,
window_height: Option<u32>,
) -> Result<BrowserSessionResult, String> {
let browser_type = match browser_type.as_deref() {
Some("firefox") => BrowserType::Firefox,
Some("edge") => BrowserType::Edge,
Some("safari") => BrowserType::Safari,
_ => BrowserType::Chrome,
};
let config = SessionConfig {
webdriver_url: webdriver_url.unwrap_or_else(|| "http://localhost:4444".to_string()),
browser_type,
headless: headless.unwrap_or(true),
window_size: window_width.zip(window_height),
..Default::default()
};
let client = state.client.read().await;
let session_id = client.create_session(config).await.map_err(|e| e.to_string())?;
Ok(BrowserSessionResult { session_id })
}
/// Close a browser session
#[tauri::command]
pub async fn browser_close_session(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.close_session(&session_id).await.map_err(|e| e.to_string())
}
/// List all browser sessions
#[tauri::command]
pub async fn browser_list_sessions(
state: State<'_, BrowserState>,
) -> Result<Vec<BrowserSessionInfo>, String> {
let client = state.client.read().await;
let sessions = client.list_sessions().await;
Ok(sessions
.into_iter()
.map(|s| BrowserSessionInfo {
id: s.id,
name: s.name,
current_url: s.current_url,
title: s.title,
status: format!("{:?}", s.status).to_lowercase(),
created_at: s.created_at.to_rfc3339(),
last_activity: s.last_activity.to_rfc3339(),
})
.collect())
}
/// Get session info
#[tauri::command]
pub async fn browser_get_session(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<BrowserSessionInfo, String> {
let client = state.client.read().await;
let session = client.get_session(&session_id).await.map_err(|e| e.to_string())?;
Ok(BrowserSessionInfo {
id: session.id,
name: session.name,
current_url: session.current_url,
title: session.title,
status: format!("{:?}", session.status).to_lowercase(),
created_at: session.created_at.to_rfc3339(),
last_activity: session.last_activity.to_rfc3339(),
})
}
// ============================================================================
// Navigation Commands
// ============================================================================
/// Navigate to URL
#[tauri::command]
pub async fn browser_navigate(
state: State<'_, BrowserState>,
session_id: String,
url: String,
) -> Result<BrowserNavigationResult, String> {
let client = state.client.read().await;
let result = client.navigate(&session_id, &url).await.map_err(|e| e.to_string())?;
Ok(BrowserNavigationResult {
url: result.url,
title: result.title,
})
}
/// Go back
#[tauri::command]
pub async fn browser_back(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.back(&session_id).await.map_err(|e| e.to_string())
}
/// Go forward
#[tauri::command]
pub async fn browser_forward(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.forward(&session_id).await.map_err(|e| e.to_string())
}
/// Refresh page
#[tauri::command]
pub async fn browser_refresh(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.refresh(&session_id).await.map_err(|e| e.to_string())
}
/// Get current URL
#[tauri::command]
pub async fn browser_get_url(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<String, String> {
let client = state.client.read().await;
client.get_current_url(&session_id).await.map_err(|e| e.to_string())
}
/// Get page title
#[tauri::command]
pub async fn browser_get_title(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<String, String> {
let client = state.client.read().await;
client.get_title(&session_id).await.map_err(|e| e.to_string())
}
// ============================================================================
// Element Interaction Commands
// ============================================================================
/// Find element
#[tauri::command]
pub async fn browser_find_element(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<BrowserElementInfo, String> {
let client = state.client.read().await;
let element = client.find_element(&session_id, &selector).await.map_err(|e| e.to_string())?;
Ok(BrowserElementInfo {
selector: element.selector,
tag_name: element.tag_name,
text: element.text,
is_displayed: element.is_displayed,
is_enabled: element.is_enabled,
is_selected: element.is_selected,
location: element.location.map(|l| BrowserElementLocation { x: l.x, y: l.y }),
size: element.size.map(|s| BrowserElementSize {
width: s.width,
height: s.height,
}),
})
}
/// Find multiple elements
#[tauri::command]
pub async fn browser_find_elements(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<Vec<BrowserElementInfo>, String> {
let client = state.client.read().await;
let elements = client.find_elements(&session_id, &selector).await.map_err(|e| e.to_string())?;
Ok(elements
.into_iter()
.map(|e| BrowserElementInfo {
selector: e.selector,
tag_name: e.tag_name,
text: e.text,
is_displayed: e.is_displayed,
is_enabled: e.is_enabled,
is_selected: e.is_selected,
location: e.location.map(|l| BrowserElementLocation { x: l.x, y: l.y }),
size: e.size.map(|s| BrowserElementSize {
width: s.width,
height: s.height,
}),
})
.collect())
}
/// Click element
#[tauri::command]
pub async fn browser_click(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<(), String> {
let client = state.client.read().await;
client.click(&session_id, &selector).await.map_err(|e| e.to_string())
}
/// Type text into element
#[tauri::command]
pub async fn browser_type(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
text: String,
clear_first: Option<bool>,
) -> Result<(), String> {
let client = state.client.read().await;
if clear_first.unwrap_or(false) {
client
.clear_and_type(&session_id, &selector, &text)
.await
.map_err(|e| e.to_string())
} else {
client
.type_text(&session_id, &selector, &text)
.await
.map_err(|e| e.to_string())
}
}
/// Get element text
#[tauri::command]
pub async fn browser_get_text(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<String, String> {
let client = state.client.read().await;
client.get_text(&session_id, &selector).await.map_err(|e| e.to_string())
}
/// Get element attribute
#[tauri::command]
pub async fn browser_get_attribute(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
attribute: String,
) -> Result<Option<String>, String> {
let client = state.client.read().await;
client
.get_attribute(&session_id, &selector, &attribute)
.await
.map_err(|e| e.to_string())
}
/// Wait for element
#[tauri::command]
pub async fn browser_wait_for_element(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
timeout_ms: Option<u64>,
) -> Result<BrowserElementInfo, String> {
let client = state.client.read().await;
let element = client
.wait_for_element(&session_id, &selector, timeout_ms.unwrap_or(10000))
.await
.map_err(|e| e.to_string())?;
Ok(BrowserElementInfo {
selector: element.selector,
tag_name: element.tag_name,
text: element.text,
is_displayed: element.is_displayed,
is_enabled: element.is_enabled,
is_selected: element.is_selected,
location: element.location.map(|l| BrowserElementLocation { x: l.x, y: l.y }),
size: element.size.map(|s| BrowserElementSize {
width: s.width,
height: s.height,
}),
})
}
// ============================================================================
// Advanced Commands
// ============================================================================
/// Execute JavaScript
#[tauri::command]
pub async fn browser_execute_script(
state: State<'_, BrowserState>,
session_id: String,
script: String,
args: Option<Vec<serde_json::Value>>,
) -> Result<serde_json::Value, String> {
let client = state.client.read().await;
client
.execute_script(&session_id, &script, args.unwrap_or_default())
.await
.map_err(|e| e.to_string())
}
/// Take screenshot
#[tauri::command]
pub async fn browser_screenshot(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<BrowserScreenshotResult, String> {
let client = state.client.read().await;
let result = client.screenshot(&session_id).await.map_err(|e| e.to_string())?;
Ok(BrowserScreenshotResult {
base64: result.base64,
format: result.format,
})
}
/// Take element screenshot
#[tauri::command]
pub async fn browser_element_screenshot(
state: State<'_, BrowserState>,
session_id: String,
selector: String,
) -> Result<BrowserScreenshotResult, String> {
let client = state.client.read().await;
let result = client
.element_screenshot(&session_id, &selector)
.await
.map_err(|e| e.to_string())?;
Ok(BrowserScreenshotResult {
base64: result.base64,
format: result.format,
})
}
/// Get page source
#[tauri::command]
pub async fn browser_get_source(
state: State<'_, BrowserState>,
session_id: String,
) -> Result<String, String> {
let client = state.client.read().await;
client.get_source(&session_id).await.map_err(|e| e.to_string())
}
// ============================================================================
// High-Level Task Commands (for Hands integration)
// ============================================================================
/// Scrape page content
#[tauri::command]
pub async fn browser_scrape_page(
state: State<'_, BrowserState>,
session_id: String,
selectors: Vec<String>,
wait_for: Option<String>,
timeout_ms: Option<u64>,
) -> Result<serde_json::Value, String> {
let client = state.client.read().await;
// Wait for element if specified
if let Some(selector) = wait_for {
client
.wait_for_element(&session_id, &selector, timeout_ms.unwrap_or(10000))
.await
.map_err(|e| e.to_string())?;
}
// Extract content from all selectors
let mut results = serde_json::Map::new();
for selector in selectors {
if let Ok(elements) = client.find_elements(&session_id, &selector).await {
let texts: Vec<String> = elements.iter().filter_map(|e| e.text.clone()).collect();
results.insert(selector, serde_json::json!(texts));
}
}
Ok(serde_json::Value::Object(results))
}
/// Fill form
#[tauri::command]
pub async fn browser_fill_form(
state: State<'_, BrowserState>,
session_id: String,
fields: Vec<FormFieldData>,
submit_selector: Option<String>,
) -> Result<(), String> {
let client = state.client.read().await;
// Fill each field
for field in fields {
client
.clear_and_type(&session_id, &field.selector, &field.value)
.await
.map_err(|e| e.to_string())?;
}
// Submit form if selector provided
if let Some(selector) = submit_selector {
client
.click(&session_id, &selector)
.await
.map_err(|e| e.to_string())?;
}
Ok(())
}
// ============================================================================
// Response Types
// ============================================================================
#[derive(Debug, Serialize)]
pub struct BrowserSessionResult {
pub session_id: String,
}
#[derive(Debug, Serialize)]
pub struct BrowserSessionInfo {
pub id: String,
pub name: String,
pub current_url: Option<String>,
pub title: Option<String>,
pub status: String,
pub created_at: String,
pub last_activity: String,
}
#[derive(Debug, Serialize)]
pub struct BrowserNavigationResult {
pub url: Option<String>,
pub title: Option<String>,
}
#[derive(Debug, Serialize)]
pub struct BrowserElementInfo {
pub selector: String,
pub tag_name: Option<String>,
pub text: Option<String>,
pub is_displayed: bool,
pub is_enabled: bool,
pub is_selected: bool,
pub location: Option<BrowserElementLocation>,
pub size: Option<BrowserElementSize>,
}
#[derive(Debug, Serialize)]
pub struct BrowserElementLocation {
pub x: i32,
pub y: i32,
}
#[derive(Debug, Serialize)]
pub struct BrowserElementSize {
pub width: u64,
pub height: u64,
}
#[derive(Debug, Serialize)]
pub struct BrowserScreenshotResult {
pub base64: String,
pub format: String,
}
#[derive(Debug, Deserialize)]
pub struct FormFieldData {
pub selector: String,
pub value: String,
}

View File

@@ -0,0 +1,86 @@
// Browser automation error types
use serde::Serialize;
use thiserror::Error;
#[derive(Debug, Error, Serialize)]
pub enum BrowserError {
#[error("WebDriver connection failed: {0}")]
ConnectionFailed(String),
#[error("Session not found: {0}")]
SessionNotFound(String),
#[error("Element not found: {selector}")]
ElementNotFound { selector: String },
#[error("Navigation failed: {url}")]
NavigationFailed { url: String },
#[error("Timeout waiting for element: {selector}")]
Timeout { selector: String },
#[error("Invalid selector: {selector}")]
InvalidSelector { selector: String },
#[error("JavaScript execution failed: {message}")]
ScriptError { message: String },
#[error("Screenshot failed: {reason}")]
ScreenshotFailed { reason: String },
#[error("Form interaction failed: {field}")]
FormError { field: String },
#[error("WebDriver not available: {reason}")]
DriverNotAvailable { reason: String },
#[error("Session already exists: {id}")]
SessionExists { id: String },
#[error("Operation cancelled by user")]
Cancelled,
#[error("Configuration error: {0}")]
ConfigError(String),
#[error("IO error: {0}")]
IoError(String),
#[error("WebDriver command failed: {0}")]
CommandFailed(String),
#[error("Unknown error: {0}")]
Unknown(String),
}
// Manual conversion from fantoccini errors since the enum variants differ between versions
impl From<fantoccini::error::NewSessionError> for BrowserError {
fn from(e: fantoccini::error::NewSessionError) -> Self {
BrowserError::ConnectionFailed(e.to_string())
}
}
impl From<fantoccini::error::CmdError> for BrowserError {
fn from(e: fantoccini::error::CmdError) -> Self {
// Convert to string and wrap in appropriate error type
let msg = e.to_string();
if msg.contains("not found") || msg.contains("no such element") {
BrowserError::ElementNotFound { selector: msg }
} else if msg.contains("timeout") || msg.contains("timed out") {
BrowserError::Timeout { selector: msg }
} else if msg.contains("script") || msg.contains("javascript") {
BrowserError::ScriptError { message: msg }
} else {
BrowserError::CommandFailed(msg)
}
}
}
impl From<std::io::Error> for BrowserError {
fn from(e: std::io::Error) -> Self {
BrowserError::IoError(e.to_string())
}
}
pub type Result<T> = std::result::Result<T, BrowserError>;

View File

@@ -0,0 +1,13 @@
// Browser automation module using Fantoccini
// Provides Browser Hand capabilities for ZCLAW
pub mod client;
pub mod commands;
pub mod error;
pub mod session;
pub mod actions;
pub use client::BrowserClient;
pub use error::{BrowserError, Result};
pub use session::{BrowserSession, SessionConfig};
pub use actions::{BrowserAction, ActionResult};

View File

@@ -0,0 +1,187 @@
// Browser session management
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use chrono::{DateTime, Utc};
/// Browser session configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SessionConfig {
/// WebDriver URL (e.g., "http://localhost:4444")
pub webdriver_url: String,
/// Browser type (chrome, firefox, etc.)
pub browser_type: BrowserType,
/// Headless mode
pub headless: bool,
/// Window size (width, height)
pub window_size: Option<(u32, u32)>,
/// Page load timeout in seconds
pub page_load_timeout: u64,
/// Script timeout in seconds
pub script_timeout: u64,
/// Implicit wait timeout in milliseconds
pub implicit_wait_timeout: u64,
/// Custom browser arguments
pub browser_args: Vec<String>,
}
impl Default for SessionConfig {
fn default() -> Self {
Self {
webdriver_url: "http://localhost:4444".to_string(),
browser_type: BrowserType::Chrome,
headless: true,
window_size: Some((1920, 1080)),
page_load_timeout: 30,
script_timeout: 30,
implicit_wait_timeout: 1000,
browser_args: vec![],
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum BrowserType {
Chrome,
Firefox,
Edge,
Safari,
}
/// Active browser session
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BrowserSession {
/// Unique session identifier
pub id: String,
/// Session name for display
pub name: String,
/// Current URL
pub current_url: Option<String>,
/// Page title
pub title: Option<String>,
/// Session status
pub status: SessionStatus,
/// Creation timestamp
pub created_at: DateTime<Utc>,
/// Last activity timestamp
pub last_activity: DateTime<Utc>,
/// Session configuration
pub config: SessionConfig,
/// Custom metadata
pub metadata: HashMap<String, String>,
}
impl BrowserSession {
pub fn new(id: String, config: SessionConfig) -> Self {
let now = Utc::now();
Self {
id,
name: format!("Browser Session"),
current_url: None,
title: None,
status: SessionStatus::Connected,
created_at: now,
last_activity: now,
config,
metadata: HashMap::new(),
}
}
pub fn touch(&mut self) {
self.last_activity = Utc::now();
}
pub fn update_location(&mut self, url: Option<String>, title: Option<String>) {
self.current_url = url;
self.title = title;
self.touch();
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum SessionStatus {
Connecting,
Connected,
Active,
Idle,
Disconnected,
Error,
}
/// Session manager for multiple browser instances
pub struct SessionManager {
sessions: Arc<RwLock<HashMap<String, BrowserSession>>>,
}
impl SessionManager {
pub fn new() -> Self {
Self {
sessions: Arc::new(RwLock::new(HashMap::new())),
}
}
pub async fn add_session(&self, session: BrowserSession) {
let mut sessions = self.sessions.write().await;
sessions.insert(session.id.clone(), session);
}
pub async fn get_session(&self, id: &str) -> Option<BrowserSession> {
let sessions = self.sessions.read().await;
sessions.get(id).cloned()
}
pub async fn update_session(&self, id: &str, updater: impl FnOnce(&mut BrowserSession)) {
let mut sessions = self.sessions.write().await;
if let Some(session) = sessions.get_mut(id) {
updater(session);
}
}
pub async fn remove_session(&self, id: &str) -> Option<BrowserSession> {
let mut sessions = self.sessions.write().await;
sessions.remove(id)
}
pub async fn list_sessions(&self) -> Vec<BrowserSession> {
let sessions = self.sessions.read().await;
sessions.values().cloned().collect()
}
pub async fn session_count(&self) -> usize {
let sessions = self.sessions.read().await;
sessions.len()
}
}
impl Default for SessionManager {
fn default() -> Self {
Self::new()
}
}
impl Clone for SessionManager {
fn clone(&self) -> Self {
Self {
sessions: Arc::clone(&self.sessions),
}
}
}

View File

@@ -12,6 +12,9 @@ mod viking_server;
mod memory;
mod llm;
// Browser automation module (Fantoccini-based Browser Hand)
mod browser;
use serde::Serialize;
use serde_json::{json, Value};
use std::fs;
@@ -991,8 +994,12 @@ fn gateway_doctor(app: AppHandle) -> Result<String, String> {
#[cfg_attr(mobile, tauri::mobile_entry_point)]
pub fn run() {
// Initialize browser state
let browser_state = browser::commands::BrowserState::new();
tauri::Builder::default()
.plugin(tauri_plugin_opener::init())
.manage(browser_state)
.invoke_handler(tauri::generate_handler![
// OpenFang commands (new naming)
openfang_status,
@@ -1035,7 +1042,31 @@ pub fn run() {
memory::extractor::extract_session_memories,
memory::context_builder::estimate_content_tokens,
// LLM commands (for extraction)
llm::llm_complete
llm::llm_complete,
// Browser automation commands (Fantoccini-based Browser Hand)
browser::commands::browser_create_session,
browser::commands::browser_close_session,
browser::commands::browser_list_sessions,
browser::commands::browser_get_session,
browser::commands::browser_navigate,
browser::commands::browser_back,
browser::commands::browser_forward,
browser::commands::browser_refresh,
browser::commands::browser_get_url,
browser::commands::browser_get_title,
browser::commands::browser_find_element,
browser::commands::browser_find_elements,
browser::commands::browser_click,
browser::commands::browser_type,
browser::commands::browser_get_text,
browser::commands::browser_get_attribute,
browser::commands::browser_wait_for_element,
browser::commands::browser_execute_script,
browser::commands::browser_screenshot,
browser::commands::browser_element_screenshot,
browser::commands::browser_get_source,
browser::commands::browser_scrape_page,
browser::commands::browser_fill_form
])
.run(tauri::generate_context!())
.expect("error while running tauri application");