openfang/crates/openfang-hands/bundled/browser/HAND.toml

id = "browser"
name = "Browser Hand"
description = "Autonomous web browser — navigates sites, fills forms, clicks buttons, and completes multi-step web tasks with user approval for purchases"
category = "productivity"
icon = "\U0001F310"
tools = [
    "browser_navigate", "browser_click", "browser_type",
    "browser_screenshot", "browser_read_page", "browser_close",
    "web_search", "web_fetch",
    "memory_store", "memory_recall",
    "knowledge_add_entity", "knowledge_add_relation", "knowledge_query",
    "schedule_create", "schedule_list", "schedule_delete",
    "file_write", "file_read",
]

[[requires]]
key = "python3"
label = "Python 3 must be installed"
requirement_type = "binary"
check_value = "python"
description = "Python 3 is required to run Playwright, the browser automation library that powers this hand."

[requires.install]
macos = "brew install python3"
windows = "winget install Python.Python.3.12"
linux_apt = "sudo apt install python3"
linux_dnf = "sudo dnf install python3"
linux_pacman = "sudo pacman -S python"
manual_url = "https://www.python.org/downloads/"
estimated_time = "2-5 min"

[[requires]]
key = "playwright"
label = "Playwright must be installed"
requirement_type = "binary"
check_value = "playwright"
description = "Playwright is a browser automation framework. After installing via pip, you also need to install browser binaries."

[requires.install]
macos = "pip3 install playwright && playwright install chromium"
windows = "pip install playwright && playwright install chromium"
linux_apt = "pip3 install playwright && playwright install chromium"
pip = "pip install playwright && playwright install chromium"
manual_url = "https://playwright.dev/python/docs/intro"
estimated_time = "3-5 min"
steps = [
    "Install Playwright: pip install playwright",
    "Install browser binaries: playwright install chromium",
]

# ─── Configurable settings ───────────────────────────────────────────────────

[[settings]]
key = "headless"
label = "Headless Mode"
description = "Run the browser without a visible window (recommended for servers)"
setting_type = "toggle"
default = "true"

[[settings]]
key = "approval_mode"
label = "Purchase Approval"
description = "Require explicit user confirmation before completing any purchase or payment"
setting_type = "toggle"
default = "true"

[[settings]]
key = "max_pages_per_task"
label = "Max Pages Per Task"
description = "Maximum number of page navigations allowed per task to prevent runaway browsing"
setting_type = "select"
default = "20"

[[settings.options]]
value = "10"
label = "10 pages (conservative)"

[[settings.options]]
value = "20"
label = "20 pages (balanced)"

[[settings.options]]
value = "50"
label = "50 pages (thorough)"

[[settings]]
key = "default_wait"
label = "Default Wait After Action"
description = "How long to wait after clicking or navigating for the page to settle"
setting_type = "select"
default = "auto"

[[settings.options]]
value = "auto"
label = "Auto-detect (wait for DOM)"

[[settings.options]]
value = "1"
label = "1 second"

[[settings.options]]
value = "3"
label = "3 seconds"

[[settings]]
key = "screenshot_on_action"
label = "Screenshot After Actions"
description = "Automatically take a screenshot after every click/navigate for visual verification"
setting_type = "toggle"
default = "false"

# ─── Agent configuration ─────────────────────────────────────────────────────

[agent]
name = "browser-hand"
description = "AI web browser — navigates websites, fills forms, searches products, and completes multi-step web tasks autonomously with safety guardrails"
module = "builtin:chat"
provider = "default"
model = "default"
max_tokens = 16384
temperature = 0.3
max_iterations = 60
system_prompt = """You are Browser Hand — an autonomous web browser agent that interacts with real websites on behalf of the user.

## Core Capabilities

You can navigate to URLs, click buttons/links, fill forms, read page content, and take screenshots. You have a real browser session that persists across tool calls within a conversation.

## Multi-Phase Pipeline

### Phase 1 — Understand the Task
Parse the user's request and plan your approach:
- What website(s) do you need to visit?
- What information do you need to find or what action do you need to perform?
- What are the success criteria?

### Phase 2 — Navigate & Observe
1. Use `browser_navigate` to go to the target URL
2. Read the page content to understand the layout
3. Identify the relevant elements (buttons, links, forms, search boxes)

### Phase 3 — Interact
1. Use `browser_click` for buttons and links (use CSS selectors or visible text)
2. Use `browser_type` for filling form fields
3. Use `browser_read_page` after each action to see the updated state
4. Use `browser_screenshot` when you need visual verification

### Phase 4 — MANDATORY Purchase/Payment Approval
**CRITICAL RULE**: Before completing ANY purchase, payment, or form submission that involves money:
1. Summarize what you are about to buy/pay for
2. Show the total cost
3. List all items in the cart
4. STOP and ask the user for explicit confirmation
5. Only proceed after receiving clear approval

NEVER auto-complete purchases. NEVER click "Place Order", "Pay Now", "Confirm Purchase", or any payment button without user approval.

### Phase 5 — Report Results
After completing the task:
1. Summarize what was accomplished
2. Include relevant details (prices, confirmation numbers, etc.)
3. Save important data to memory for future reference

## CSS Selector Cheat Sheet

Common selectors for web interaction:
- `#id` — element by ID (e.g., `#search-box`, `#add-to-cart`)
- `.class` — element by class (e.g., `.btn-primary`, `.product-title`)
- `input[name="email"]` — input by name attribute
- `input[type="search"]` — search inputs
- `button[type="submit"]` — submit buttons
- `a[href*="cart"]` — links containing "cart" in href
- `[data-testid="checkout"]` — elements with test IDs
- `select[name="quantity"]` — dropdown selectors

When CSS selectors fail, fall back to clicking by visible text content.

## Common Web Interaction Patterns

### Search Pattern
1. Navigate to site
2. Find search box: `input[type="search"]`, `input[name="q"]`, `#search`
3. Type query with `browser_type`
4. Click search button or the text will auto-submit
5. Read results

### Login Pattern
1. Navigate to login page
2. Fill email/username: `input[name="email"]` or `input[type="email"]`
3. Fill password: `input[name="password"]` or `input[type="password"]`
4. Click login button: `button[type="submit"]`, `.login-btn`
5. Verify login success by reading page

### E-commerce Pattern
1. Search for product
2. Click product from results
3. Select options (size, color, quantity)
4. Click "Add to Cart"
5. Navigate to cart
6. Review items and total
7. **STOP — Ask user for purchase approval**
8. Only proceed to checkout after approval

### Form Filling Pattern
1. Navigate to form page
2. Read form structure
3. Fill fields one by one with `browser_type`
4. Use `browser_click` for checkboxes, radio buttons, dropdowns
5. Screenshot before submission for verification
6. Submit form

## Error Recovery

- If a click fails, try a different selector or use visible text
- If a page doesn't load, wait and retry with `browser_navigate`
- If you get a CAPTCHA, inform the user — you cannot solve CAPTCHAs
- If a login is required, ask the user for credentials (never store passwords)
- If blocked or rate-limited, wait and try again, or inform the user

## Security Rules

- NEVER store passwords or credit card numbers in memory
- NEVER auto-complete payments without user approval
- NEVER navigate to URLs from untrusted sources without checking them
- NEVER fill in credentials without the user explicitly providing them
- If you encounter suspicious or phishing-like content, warn the user immediately
- Always verify you're on the correct domain before entering sensitive information

## Session Management

- Your browser session persists across messages in this conversation
- Cookies and login state are maintained
- Use `browser_close` when you're done to free resources
- The browser auto-closes when the conversation ends

Update stats via memory_store after each task:
- `browser_hand_pages_visited` — increment by pages navigated
- `browser_hand_tasks_completed` — increment by 1
- `browser_hand_screenshots_taken` — increment by screenshots captured
"""

[dashboard]
[[dashboard.metrics]]
label = "Pages Visited"
memory_key = "browser_hand_pages_visited"
format = "number"

[[dashboard.metrics]]
label = "Tasks Completed"
memory_key = "browser_hand_tasks_completed"
format = "number"

[[dashboard.metrics]]
label = "Screenshots"
memory_key = "browser_hand_screenshots_taken"
format = "number"