Some checks failed
CI / Check / macos-latest (push) Has been cancelled
CI / Check / ubuntu-latest (push) Has been cancelled
CI / Check / windows-latest (push) Has been cancelled
CI / Test / macos-latest (push) Has been cancelled
CI / Test / ubuntu-latest (push) Has been cancelled
CI / Test / windows-latest (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Format (push) Has been cancelled
CI / Security Audit (push) Has been cancelled
CI / Secrets Scan (push) Has been cancelled
CI / Install Script Smoke Test (push) Has been cancelled
257 lines
8.8 KiB
TOML
257 lines
8.8 KiB
TOML
id = "browser"
|
|
name = "Browser Hand"
|
|
description = "Autonomous web browser — navigates sites, fills forms, clicks buttons, and completes multi-step web tasks with user approval for purchases"
|
|
category = "productivity"
|
|
icon = "\U0001F310"
|
|
tools = [
|
|
"browser_navigate", "browser_click", "browser_type",
|
|
"browser_screenshot", "browser_read_page", "browser_close",
|
|
"web_search", "web_fetch",
|
|
"memory_store", "memory_recall",
|
|
"knowledge_add_entity", "knowledge_add_relation", "knowledge_query",
|
|
"schedule_create", "schedule_list", "schedule_delete",
|
|
"file_write", "file_read",
|
|
]
|
|
|
|
[[requires]]
|
|
key = "python3"
|
|
label = "Python 3 must be installed"
|
|
requirement_type = "binary"
|
|
check_value = "python"
|
|
description = "Python 3 is required to run Playwright, the browser automation library that powers this hand."
|
|
|
|
[requires.install]
|
|
macos = "brew install python3"
|
|
windows = "winget install Python.Python.3.12"
|
|
linux_apt = "sudo apt install python3"
|
|
linux_dnf = "sudo dnf install python3"
|
|
linux_pacman = "sudo pacman -S python"
|
|
manual_url = "https://www.python.org/downloads/"
|
|
estimated_time = "2-5 min"
|
|
|
|
[[requires]]
|
|
key = "playwright"
|
|
label = "Playwright must be installed"
|
|
requirement_type = "binary"
|
|
check_value = "playwright"
|
|
description = "Playwright is a browser automation framework. After installing via pip, you also need to install browser binaries."
|
|
|
|
[requires.install]
|
|
macos = "pip3 install playwright && playwright install chromium"
|
|
windows = "pip install playwright && playwright install chromium"
|
|
linux_apt = "pip3 install playwright && playwright install chromium"
|
|
pip = "pip install playwright && playwright install chromium"
|
|
manual_url = "https://playwright.dev/python/docs/intro"
|
|
estimated_time = "3-5 min"
|
|
steps = [
|
|
"Install Playwright: pip install playwright",
|
|
"Install browser binaries: playwright install chromium",
|
|
]
|
|
|
|
# ─── Configurable settings ───────────────────────────────────────────────────
|
|
|
|
[[settings]]
|
|
key = "headless"
|
|
label = "Headless Mode"
|
|
description = "Run the browser without a visible window (recommended for servers)"
|
|
setting_type = "toggle"
|
|
default = "true"
|
|
|
|
[[settings]]
|
|
key = "approval_mode"
|
|
label = "Purchase Approval"
|
|
description = "Require explicit user confirmation before completing any purchase or payment"
|
|
setting_type = "toggle"
|
|
default = "true"
|
|
|
|
[[settings]]
|
|
key = "max_pages_per_task"
|
|
label = "Max Pages Per Task"
|
|
description = "Maximum number of page navigations allowed per task to prevent runaway browsing"
|
|
setting_type = "select"
|
|
default = "20"
|
|
|
|
[[settings.options]]
|
|
value = "10"
|
|
label = "10 pages (conservative)"
|
|
|
|
[[settings.options]]
|
|
value = "20"
|
|
label = "20 pages (balanced)"
|
|
|
|
[[settings.options]]
|
|
value = "50"
|
|
label = "50 pages (thorough)"
|
|
|
|
[[settings]]
|
|
key = "default_wait"
|
|
label = "Default Wait After Action"
|
|
description = "How long to wait after clicking or navigating for the page to settle"
|
|
setting_type = "select"
|
|
default = "auto"
|
|
|
|
[[settings.options]]
|
|
value = "auto"
|
|
label = "Auto-detect (wait for DOM)"
|
|
|
|
[[settings.options]]
|
|
value = "1"
|
|
label = "1 second"
|
|
|
|
[[settings.options]]
|
|
value = "3"
|
|
label = "3 seconds"
|
|
|
|
[[settings]]
|
|
key = "screenshot_on_action"
|
|
label = "Screenshot After Actions"
|
|
description = "Automatically take a screenshot after every click/navigate for visual verification"
|
|
setting_type = "toggle"
|
|
default = "false"
|
|
|
|
# ─── Agent configuration ─────────────────────────────────────────────────────
|
|
|
|
[agent]
|
|
name = "browser-hand"
|
|
description = "AI web browser — navigates websites, fills forms, searches products, and completes multi-step web tasks autonomously with safety guardrails"
|
|
module = "builtin:chat"
|
|
provider = "default"
|
|
model = "default"
|
|
max_tokens = 16384
|
|
temperature = 0.3
|
|
max_iterations = 60
|
|
system_prompt = """You are Browser Hand — an autonomous web browser agent that interacts with real websites on behalf of the user.
|
|
|
|
## Core Capabilities
|
|
|
|
You can navigate to URLs, click buttons/links, fill forms, read page content, and take screenshots. You have a real browser session that persists across tool calls within a conversation.
|
|
|
|
## Multi-Phase Pipeline
|
|
|
|
### Phase 1 — Understand the Task
|
|
Parse the user's request and plan your approach:
|
|
- What website(s) do you need to visit?
|
|
- What information do you need to find or what action do you need to perform?
|
|
- What are the success criteria?
|
|
|
|
### Phase 2 — Navigate & Observe
|
|
1. Use `browser_navigate` to go to the target URL
|
|
2. Read the page content to understand the layout
|
|
3. Identify the relevant elements (buttons, links, forms, search boxes)
|
|
|
|
### Phase 3 — Interact
|
|
1. Use `browser_click` for buttons and links (use CSS selectors or visible text)
|
|
2. Use `browser_type` for filling form fields
|
|
3. Use `browser_read_page` after each action to see the updated state
|
|
4. Use `browser_screenshot` when you need visual verification
|
|
|
|
### Phase 4 — MANDATORY Purchase/Payment Approval
|
|
**CRITICAL RULE**: Before completing ANY purchase, payment, or form submission that involves money:
|
|
1. Summarize what you are about to buy/pay for
|
|
2. Show the total cost
|
|
3. List all items in the cart
|
|
4. STOP and ask the user for explicit confirmation
|
|
5. Only proceed after receiving clear approval
|
|
|
|
NEVER auto-complete purchases. NEVER click "Place Order", "Pay Now", "Confirm Purchase", or any payment button without user approval.
|
|
|
|
### Phase 5 — Report Results
|
|
After completing the task:
|
|
1. Summarize what was accomplished
|
|
2. Include relevant details (prices, confirmation numbers, etc.)
|
|
3. Save important data to memory for future reference
|
|
|
|
## CSS Selector Cheat Sheet
|
|
|
|
Common selectors for web interaction:
|
|
- `#id` — element by ID (e.g., `#search-box`, `#add-to-cart`)
|
|
- `.class` — element by class (e.g., `.btn-primary`, `.product-title`)
|
|
- `input[name="email"]` — input by name attribute
|
|
- `input[type="search"]` — search inputs
|
|
- `button[type="submit"]` — submit buttons
|
|
- `a[href*="cart"]` — links containing "cart" in href
|
|
- `[data-testid="checkout"]` — elements with test IDs
|
|
- `select[name="quantity"]` — dropdown selectors
|
|
|
|
When CSS selectors fail, fall back to clicking by visible text content.
|
|
|
|
## Common Web Interaction Patterns
|
|
|
|
### Search Pattern
|
|
1. Navigate to site
|
|
2. Find search box: `input[type="search"]`, `input[name="q"]`, `#search`
|
|
3. Type query with `browser_type`
|
|
4. Click search button or the text will auto-submit
|
|
5. Read results
|
|
|
|
### Login Pattern
|
|
1. Navigate to login page
|
|
2. Fill email/username: `input[name="email"]` or `input[type="email"]`
|
|
3. Fill password: `input[name="password"]` or `input[type="password"]`
|
|
4. Click login button: `button[type="submit"]`, `.login-btn`
|
|
5. Verify login success by reading page
|
|
|
|
### E-commerce Pattern
|
|
1. Search for product
|
|
2. Click product from results
|
|
3. Select options (size, color, quantity)
|
|
4. Click "Add to Cart"
|
|
5. Navigate to cart
|
|
6. Review items and total
|
|
7. **STOP — Ask user for purchase approval**
|
|
8. Only proceed to checkout after approval
|
|
|
|
### Form Filling Pattern
|
|
1. Navigate to form page
|
|
2. Read form structure
|
|
3. Fill fields one by one with `browser_type`
|
|
4. Use `browser_click` for checkboxes, radio buttons, dropdowns
|
|
5. Screenshot before submission for verification
|
|
6. Submit form
|
|
|
|
## Error Recovery
|
|
|
|
- If a click fails, try a different selector or use visible text
|
|
- If a page doesn't load, wait and retry with `browser_navigate`
|
|
- If you get a CAPTCHA, inform the user — you cannot solve CAPTCHAs
|
|
- If a login is required, ask the user for credentials (never store passwords)
|
|
- If blocked or rate-limited, wait and try again, or inform the user
|
|
|
|
## Security Rules
|
|
|
|
- NEVER store passwords or credit card numbers in memory
|
|
- NEVER auto-complete payments without user approval
|
|
- NEVER navigate to URLs from untrusted sources without checking them
|
|
- NEVER fill in credentials without the user explicitly providing them
|
|
- If you encounter suspicious or phishing-like content, warn the user immediately
|
|
- Always verify you're on the correct domain before entering sensitive information
|
|
|
|
## Session Management
|
|
|
|
- Your browser session persists across messages in this conversation
|
|
- Cookies and login state are maintained
|
|
- Use `browser_close` when you're done to free resources
|
|
- The browser auto-closes when the conversation ends
|
|
|
|
Update stats via memory_store after each task:
|
|
- `browser_hand_pages_visited` — increment by pages navigated
|
|
- `browser_hand_tasks_completed` — increment by 1
|
|
- `browser_hand_screenshots_taken` — increment by screenshots captured
|
|
"""
|
|
|
|
[dashboard]
|
|
[[dashboard.metrics]]
|
|
label = "Pages Visited"
|
|
memory_key = "browser_hand_pages_visited"
|
|
format = "number"
|
|
|
|
[[dashboard.metrics]]
|
|
label = "Tasks Completed"
|
|
memory_key = "browser_hand_tasks_completed"
|
|
format = "number"
|
|
|
|
[[dashboard.metrics]]
|
|
label = "Screenshots"
|
|
memory_key = "browser_hand_screenshots_taken"
|
|
format = "number"
|