初始化提交
Some checks failed
CI / Check / macos-latest (push) Has been cancelled
CI / Check / ubuntu-latest (push) Has been cancelled
CI / Check / windows-latest (push) Has been cancelled
CI / Test / macos-latest (push) Has been cancelled
CI / Test / ubuntu-latest (push) Has been cancelled
CI / Test / windows-latest (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Format (push) Has been cancelled
CI / Security Audit (push) Has been cancelled
CI / Secrets Scan (push) Has been cancelled
CI / Install Script Smoke Test (push) Has been cancelled
Some checks failed
CI / Check / macos-latest (push) Has been cancelled
CI / Check / ubuntu-latest (push) Has been cancelled
CI / Check / windows-latest (push) Has been cancelled
CI / Test / macos-latest (push) Has been cancelled
CI / Test / ubuntu-latest (push) Has been cancelled
CI / Test / windows-latest (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Format (push) Has been cancelled
CI / Security Audit (push) Has been cancelled
CI / Secrets Scan (push) Has been cancelled
CI / Install Script Smoke Test (push) Has been cancelled
This commit is contained in:
256
crates/openfang-hands/bundled/browser/HAND.toml
Normal file
256
crates/openfang-hands/bundled/browser/HAND.toml
Normal file
@@ -0,0 +1,256 @@
|
||||
id = "browser"
|
||||
name = "Browser Hand"
|
||||
description = "Autonomous web browser — navigates sites, fills forms, clicks buttons, and completes multi-step web tasks with user approval for purchases"
|
||||
category = "productivity"
|
||||
icon = "\U0001F310"
|
||||
tools = [
|
||||
"browser_navigate", "browser_click", "browser_type",
|
||||
"browser_screenshot", "browser_read_page", "browser_close",
|
||||
"web_search", "web_fetch",
|
||||
"memory_store", "memory_recall",
|
||||
"knowledge_add_entity", "knowledge_add_relation", "knowledge_query",
|
||||
"schedule_create", "schedule_list", "schedule_delete",
|
||||
"file_write", "file_read",
|
||||
]
|
||||
|
||||
[[requires]]
|
||||
key = "python3"
|
||||
label = "Python 3 must be installed"
|
||||
requirement_type = "binary"
|
||||
check_value = "python"
|
||||
description = "Python 3 is required to run Playwright, the browser automation library that powers this hand."
|
||||
|
||||
[requires.install]
|
||||
macos = "brew install python3"
|
||||
windows = "winget install Python.Python.3.12"
|
||||
linux_apt = "sudo apt install python3"
|
||||
linux_dnf = "sudo dnf install python3"
|
||||
linux_pacman = "sudo pacman -S python"
|
||||
manual_url = "https://www.python.org/downloads/"
|
||||
estimated_time = "2-5 min"
|
||||
|
||||
[[requires]]
|
||||
key = "playwright"
|
||||
label = "Playwright must be installed"
|
||||
requirement_type = "binary"
|
||||
check_value = "playwright"
|
||||
description = "Playwright is a browser automation framework. After installing via pip, you also need to install browser binaries."
|
||||
|
||||
[requires.install]
|
||||
macos = "pip3 install playwright && playwright install chromium"
|
||||
windows = "pip install playwright && playwright install chromium"
|
||||
linux_apt = "pip3 install playwright && playwright install chromium"
|
||||
pip = "pip install playwright && playwright install chromium"
|
||||
manual_url = "https://playwright.dev/python/docs/intro"
|
||||
estimated_time = "3-5 min"
|
||||
steps = [
|
||||
"Install Playwright: pip install playwright",
|
||||
"Install browser binaries: playwright install chromium",
|
||||
]
|
||||
|
||||
# ─── Configurable settings ───────────────────────────────────────────────────
|
||||
|
||||
[[settings]]
|
||||
key = "headless"
|
||||
label = "Headless Mode"
|
||||
description = "Run the browser without a visible window (recommended for servers)"
|
||||
setting_type = "toggle"
|
||||
default = "true"
|
||||
|
||||
[[settings]]
|
||||
key = "approval_mode"
|
||||
label = "Purchase Approval"
|
||||
description = "Require explicit user confirmation before completing any purchase or payment"
|
||||
setting_type = "toggle"
|
||||
default = "true"
|
||||
|
||||
[[settings]]
|
||||
key = "max_pages_per_task"
|
||||
label = "Max Pages Per Task"
|
||||
description = "Maximum number of page navigations allowed per task to prevent runaway browsing"
|
||||
setting_type = "select"
|
||||
default = "20"
|
||||
|
||||
[[settings.options]]
|
||||
value = "10"
|
||||
label = "10 pages (conservative)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "20"
|
||||
label = "20 pages (balanced)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "50"
|
||||
label = "50 pages (thorough)"
|
||||
|
||||
[[settings]]
|
||||
key = "default_wait"
|
||||
label = "Default Wait After Action"
|
||||
description = "How long to wait after clicking or navigating for the page to settle"
|
||||
setting_type = "select"
|
||||
default = "auto"
|
||||
|
||||
[[settings.options]]
|
||||
value = "auto"
|
||||
label = "Auto-detect (wait for DOM)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "1"
|
||||
label = "1 second"
|
||||
|
||||
[[settings.options]]
|
||||
value = "3"
|
||||
label = "3 seconds"
|
||||
|
||||
[[settings]]
|
||||
key = "screenshot_on_action"
|
||||
label = "Screenshot After Actions"
|
||||
description = "Automatically take a screenshot after every click/navigate for visual verification"
|
||||
setting_type = "toggle"
|
||||
default = "false"
|
||||
|
||||
# ─── Agent configuration ─────────────────────────────────────────────────────
|
||||
|
||||
[agent]
|
||||
name = "browser-hand"
|
||||
description = "AI web browser — navigates websites, fills forms, searches products, and completes multi-step web tasks autonomously with safety guardrails"
|
||||
module = "builtin:chat"
|
||||
provider = "default"
|
||||
model = "default"
|
||||
max_tokens = 16384
|
||||
temperature = 0.3
|
||||
max_iterations = 60
|
||||
system_prompt = """You are Browser Hand — an autonomous web browser agent that interacts with real websites on behalf of the user.
|
||||
|
||||
## Core Capabilities
|
||||
|
||||
You can navigate to URLs, click buttons/links, fill forms, read page content, and take screenshots. You have a real browser session that persists across tool calls within a conversation.
|
||||
|
||||
## Multi-Phase Pipeline
|
||||
|
||||
### Phase 1 — Understand the Task
|
||||
Parse the user's request and plan your approach:
|
||||
- What website(s) do you need to visit?
|
||||
- What information do you need to find or what action do you need to perform?
|
||||
- What are the success criteria?
|
||||
|
||||
### Phase 2 — Navigate & Observe
|
||||
1. Use `browser_navigate` to go to the target URL
|
||||
2. Read the page content to understand the layout
|
||||
3. Identify the relevant elements (buttons, links, forms, search boxes)
|
||||
|
||||
### Phase 3 — Interact
|
||||
1. Use `browser_click` for buttons and links (use CSS selectors or visible text)
|
||||
2. Use `browser_type` for filling form fields
|
||||
3. Use `browser_read_page` after each action to see the updated state
|
||||
4. Use `browser_screenshot` when you need visual verification
|
||||
|
||||
### Phase 4 — MANDATORY Purchase/Payment Approval
|
||||
**CRITICAL RULE**: Before completing ANY purchase, payment, or form submission that involves money:
|
||||
1. Summarize what you are about to buy/pay for
|
||||
2. Show the total cost
|
||||
3. List all items in the cart
|
||||
4. STOP and ask the user for explicit confirmation
|
||||
5. Only proceed after receiving clear approval
|
||||
|
||||
NEVER auto-complete purchases. NEVER click "Place Order", "Pay Now", "Confirm Purchase", or any payment button without user approval.
|
||||
|
||||
### Phase 5 — Report Results
|
||||
After completing the task:
|
||||
1. Summarize what was accomplished
|
||||
2. Include relevant details (prices, confirmation numbers, etc.)
|
||||
3. Save important data to memory for future reference
|
||||
|
||||
## CSS Selector Cheat Sheet
|
||||
|
||||
Common selectors for web interaction:
|
||||
- `#id` — element by ID (e.g., `#search-box`, `#add-to-cart`)
|
||||
- `.class` — element by class (e.g., `.btn-primary`, `.product-title`)
|
||||
- `input[name="email"]` — input by name attribute
|
||||
- `input[type="search"]` — search inputs
|
||||
- `button[type="submit"]` — submit buttons
|
||||
- `a[href*="cart"]` — links containing "cart" in href
|
||||
- `[data-testid="checkout"]` — elements with test IDs
|
||||
- `select[name="quantity"]` — dropdown selectors
|
||||
|
||||
When CSS selectors fail, fall back to clicking by visible text content.
|
||||
|
||||
## Common Web Interaction Patterns
|
||||
|
||||
### Search Pattern
|
||||
1. Navigate to site
|
||||
2. Find search box: `input[type="search"]`, `input[name="q"]`, `#search`
|
||||
3. Type query with `browser_type`
|
||||
4. Click search button or the text will auto-submit
|
||||
5. Read results
|
||||
|
||||
### Login Pattern
|
||||
1. Navigate to login page
|
||||
2. Fill email/username: `input[name="email"]` or `input[type="email"]`
|
||||
3. Fill password: `input[name="password"]` or `input[type="password"]`
|
||||
4. Click login button: `button[type="submit"]`, `.login-btn`
|
||||
5. Verify login success by reading page
|
||||
|
||||
### E-commerce Pattern
|
||||
1. Search for product
|
||||
2. Click product from results
|
||||
3. Select options (size, color, quantity)
|
||||
4. Click "Add to Cart"
|
||||
5. Navigate to cart
|
||||
6. Review items and total
|
||||
7. **STOP — Ask user for purchase approval**
|
||||
8. Only proceed to checkout after approval
|
||||
|
||||
### Form Filling Pattern
|
||||
1. Navigate to form page
|
||||
2. Read form structure
|
||||
3. Fill fields one by one with `browser_type`
|
||||
4. Use `browser_click` for checkboxes, radio buttons, dropdowns
|
||||
5. Screenshot before submission for verification
|
||||
6. Submit form
|
||||
|
||||
## Error Recovery
|
||||
|
||||
- If a click fails, try a different selector or use visible text
|
||||
- If a page doesn't load, wait and retry with `browser_navigate`
|
||||
- If you get a CAPTCHA, inform the user — you cannot solve CAPTCHAs
|
||||
- If a login is required, ask the user for credentials (never store passwords)
|
||||
- If blocked or rate-limited, wait and try again, or inform the user
|
||||
|
||||
## Security Rules
|
||||
|
||||
- NEVER store passwords or credit card numbers in memory
|
||||
- NEVER auto-complete payments without user approval
|
||||
- NEVER navigate to URLs from untrusted sources without checking them
|
||||
- NEVER fill in credentials without the user explicitly providing them
|
||||
- If you encounter suspicious or phishing-like content, warn the user immediately
|
||||
- Always verify you're on the correct domain before entering sensitive information
|
||||
|
||||
## Session Management
|
||||
|
||||
- Your browser session persists across messages in this conversation
|
||||
- Cookies and login state are maintained
|
||||
- Use `browser_close` when you're done to free resources
|
||||
- The browser auto-closes when the conversation ends
|
||||
|
||||
Update stats via memory_store after each task:
|
||||
- `browser_hand_pages_visited` — increment by pages navigated
|
||||
- `browser_hand_tasks_completed` — increment by 1
|
||||
- `browser_hand_screenshots_taken` — increment by screenshots captured
|
||||
"""
|
||||
|
||||
[dashboard]
|
||||
[[dashboard.metrics]]
|
||||
label = "Pages Visited"
|
||||
memory_key = "browser_hand_pages_visited"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Tasks Completed"
|
||||
memory_key = "browser_hand_tasks_completed"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Screenshots"
|
||||
memory_key = "browser_hand_screenshots_taken"
|
||||
format = "number"
|
||||
124
crates/openfang-hands/bundled/browser/SKILL.md
Normal file
124
crates/openfang-hands/bundled/browser/SKILL.md
Normal file
@@ -0,0 +1,124 @@
|
||||
---
|
||||
name: browser-automation
|
||||
version: "1.0.0"
|
||||
description: Playwright-based browser automation patterns for autonomous web interaction
|
||||
author: OpenFang
|
||||
tags: [browser, automation, playwright, web, scraping]
|
||||
tools: [browser_navigate, browser_click, browser_type, browser_screenshot, browser_read_page, browser_close]
|
||||
runtime: prompt_only
|
||||
---
|
||||
|
||||
# Browser Automation Skill
|
||||
|
||||
## Playwright CSS Selector Reference
|
||||
|
||||
### Basic Selectors
|
||||
| Selector | Description | Example |
|
||||
|----------|-------------|---------|
|
||||
| `#id` | By ID | `#checkout-btn` |
|
||||
| `.class` | By class | `.add-to-cart` |
|
||||
| `tag` | By element | `button`, `input` |
|
||||
| `[attr=val]` | By attribute | `[data-testid="submit"]` |
|
||||
| `tag.class` | Combined | `button.primary` |
|
||||
|
||||
### Form Selectors
|
||||
| Selector | Use Case |
|
||||
|----------|----------|
|
||||
| `input[type="email"]` | Email fields |
|
||||
| `input[type="password"]` | Password fields |
|
||||
| `input[type="search"]` | Search boxes |
|
||||
| `input[name="q"]` | Google/search query |
|
||||
| `textarea` | Multi-line text areas |
|
||||
| `select[name="country"]` | Dropdown menus |
|
||||
| `input[type="checkbox"]` | Checkboxes |
|
||||
| `input[type="radio"]` | Radio buttons |
|
||||
| `button[type="submit"]` | Submit buttons |
|
||||
|
||||
### Navigation Selectors
|
||||
| Selector | Use Case |
|
||||
|----------|----------|
|
||||
| `a[href*="cart"]` | Cart links |
|
||||
| `a[href*="checkout"]` | Checkout links |
|
||||
| `a[href*="login"]` | Login links |
|
||||
| `nav a` | Navigation menu links |
|
||||
| `.breadcrumb a` | Breadcrumb links |
|
||||
| `[role="navigation"] a` | ARIA nav links |
|
||||
|
||||
### E-commerce Selectors
|
||||
| Selector | Use Case |
|
||||
|----------|----------|
|
||||
| `.product-price`, `[data-price]` | Product prices |
|
||||
| `.add-to-cart`, `#add-to-cart` | Add to cart buttons |
|
||||
| `.cart-total`, `.order-total` | Cart total |
|
||||
| `.quantity`, `input[name="quantity"]` | Quantity selectors |
|
||||
| `.checkout-btn`, `#checkout` | Checkout buttons |
|
||||
|
||||
## Common Workflows
|
||||
|
||||
### Product Search & Purchase
|
||||
```
|
||||
1. browser_navigate → store homepage
|
||||
2. browser_type → search box with product name
|
||||
3. browser_click → search button or press Enter
|
||||
4. browser_read_page → scan results
|
||||
5. browser_click → desired product
|
||||
6. browser_read_page → verify product details & price
|
||||
7. browser_click → "Add to Cart"
|
||||
8. browser_navigate → cart page
|
||||
9. browser_read_page → verify cart contents & total
|
||||
10. STOP → Report to user, wait for approval
|
||||
11. browser_click → "Proceed to Checkout" (only after approval)
|
||||
```
|
||||
|
||||
### Account Login
|
||||
```
|
||||
1. browser_navigate → login page
|
||||
2. browser_type → email/username field
|
||||
3. browser_type → password field
|
||||
4. browser_click → login/submit button
|
||||
5. browser_read_page → verify successful login
|
||||
```
|
||||
|
||||
### Form Submission
|
||||
```
|
||||
1. browser_navigate → form page
|
||||
2. browser_read_page → understand form structure
|
||||
3. browser_type → fill each field sequentially
|
||||
4. browser_click → checkboxes/radio buttons as needed
|
||||
5. browser_screenshot → visual verification before submit
|
||||
6. browser_click → submit button
|
||||
7. browser_read_page → verify confirmation
|
||||
```
|
||||
|
||||
### Price Comparison
|
||||
```
|
||||
1. For each store:
|
||||
a. browser_navigate → store URL
|
||||
b. browser_type → search query
|
||||
c. browser_read_page → extract prices
|
||||
d. memory_store → save price data
|
||||
2. memory_recall → compare all prices
|
||||
3. Report findings to user
|
||||
```
|
||||
|
||||
## Error Recovery Strategies
|
||||
|
||||
| Error | Recovery |
|
||||
|-------|----------|
|
||||
| Element not found | Try alternative selector, use visible text, scroll page |
|
||||
| Page timeout | Retry navigation, check URL |
|
||||
| Login required | Inform user, ask for credentials |
|
||||
| CAPTCHA | Cannot solve — inform user |
|
||||
| Pop-up/modal | Click dismiss/close button first |
|
||||
| Cookie consent | Click "Accept" or dismiss banner |
|
||||
| Rate limited | Wait 30s, retry |
|
||||
| Wrong page | Use browser_read_page to verify, navigate back |
|
||||
|
||||
## Security Checklist
|
||||
|
||||
- Verify domain before entering credentials
|
||||
- Never store passwords in memory_store
|
||||
- Check for HTTPS before submitting sensitive data
|
||||
- Report suspicious redirects to user
|
||||
- Never auto-approve financial transactions
|
||||
- Warn about phishing indicators (misspelled domains, unusual URLs)
|
||||
590
crates/openfang-hands/bundled/clip/HAND.toml
Normal file
590
crates/openfang-hands/bundled/clip/HAND.toml
Normal file
@@ -0,0 +1,590 @@
|
||||
id = "clip"
|
||||
name = "Clip Hand"
|
||||
description = "Turns long-form video into viral short clips with captions and thumbnails"
|
||||
category = "content"
|
||||
icon = "\U0001F3AC"
|
||||
tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "memory_store", "memory_recall"]
|
||||
|
||||
[[requires]]
|
||||
key = "ffmpeg"
|
||||
label = "FFmpeg must be installed"
|
||||
requirement_type = "binary"
|
||||
check_value = "ffmpeg"
|
||||
description = "FFmpeg is the core video processing engine used to extract clips, burn captions, crop to vertical, and generate thumbnails."
|
||||
|
||||
[requires.install]
|
||||
macos = "brew install ffmpeg"
|
||||
windows = "winget install Gyan.FFmpeg"
|
||||
linux_apt = "sudo apt install ffmpeg"
|
||||
linux_dnf = "sudo dnf install ffmpeg-free"
|
||||
linux_pacman = "sudo pacman -S ffmpeg"
|
||||
manual_url = "https://ffmpeg.org/download.html"
|
||||
estimated_time = "2-5 min"
|
||||
|
||||
[[requires]]
|
||||
key = "ffprobe"
|
||||
label = "FFprobe must be installed (ships with FFmpeg)"
|
||||
requirement_type = "binary"
|
||||
check_value = "ffprobe"
|
||||
description = "FFprobe analyzes video metadata (duration, resolution, codecs). It ships bundled with FFmpeg — if FFmpeg is installed, ffprobe is too."
|
||||
|
||||
[requires.install]
|
||||
macos = "brew install ffmpeg"
|
||||
windows = "winget install Gyan.FFmpeg"
|
||||
linux_apt = "sudo apt install ffmpeg"
|
||||
linux_dnf = "sudo dnf install ffmpeg-free"
|
||||
linux_pacman = "sudo pacman -S ffmpeg"
|
||||
manual_url = "https://ffmpeg.org/download.html"
|
||||
estimated_time = "Bundled with FFmpeg"
|
||||
|
||||
[[requires]]
|
||||
key = "yt-dlp"
|
||||
label = "yt-dlp must be installed"
|
||||
requirement_type = "binary"
|
||||
check_value = "yt-dlp"
|
||||
description = "yt-dlp downloads videos from YouTube, Vimeo, Twitter, and 1000+ other sites. It also grabs existing subtitles to skip transcription."
|
||||
|
||||
[requires.install]
|
||||
macos = "brew install yt-dlp"
|
||||
windows = "winget install yt-dlp.yt-dlp"
|
||||
linux_apt = "sudo apt install yt-dlp"
|
||||
linux_dnf = "sudo dnf install yt-dlp"
|
||||
linux_pacman = "sudo pacman -S yt-dlp"
|
||||
pip = "pip install yt-dlp"
|
||||
manual_url = "https://github.com/yt-dlp/yt-dlp#installation"
|
||||
estimated_time = "1-2 min"
|
||||
|
||||
# ─── Configurable settings ───────────────────────────────────────────────────
|
||||
|
||||
[[settings]]
|
||||
key = "stt_provider"
|
||||
label = "Speech-to-Text Provider"
|
||||
description = "How audio is transcribed to text for captions and clip selection"
|
||||
setting_type = "select"
|
||||
default = "auto"
|
||||
|
||||
[[settings.options]]
|
||||
value = "auto"
|
||||
label = "Auto-detect (best available)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "whisper_local"
|
||||
label = "Local Whisper"
|
||||
binary = "whisper"
|
||||
|
||||
[[settings.options]]
|
||||
value = "groq_whisper"
|
||||
label = "Groq Whisper API (fast, free tier)"
|
||||
provider_env = "GROQ_API_KEY"
|
||||
|
||||
[[settings.options]]
|
||||
value = "openai_whisper"
|
||||
label = "OpenAI Whisper API"
|
||||
provider_env = "OPENAI_API_KEY"
|
||||
|
||||
[[settings.options]]
|
||||
value = "deepgram"
|
||||
label = "Deepgram Nova-2"
|
||||
provider_env = "DEEPGRAM_API_KEY"
|
||||
|
||||
[[settings]]
|
||||
key = "tts_provider"
|
||||
label = "Text-to-Speech Provider"
|
||||
description = "Optional voice-over or narration generation for clips"
|
||||
setting_type = "select"
|
||||
default = "none"
|
||||
|
||||
[[settings.options]]
|
||||
value = "none"
|
||||
label = "Disabled (captions only)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "edge_tts"
|
||||
label = "Edge TTS (free)"
|
||||
binary = "edge-tts"
|
||||
|
||||
[[settings.options]]
|
||||
value = "openai_tts"
|
||||
label = "OpenAI TTS"
|
||||
provider_env = "OPENAI_API_KEY"
|
||||
|
||||
[[settings.options]]
|
||||
value = "elevenlabs"
|
||||
label = "ElevenLabs"
|
||||
provider_env = "ELEVENLABS_API_KEY"
|
||||
|
||||
# ─── Publishing settings ────────────────────────────────────────────────────
|
||||
|
||||
[[settings]]
|
||||
key = "publish_target"
|
||||
label = "Publish Clips To"
|
||||
description = "Where to send finished clips after processing. Leave as 'Local only' to skip publishing."
|
||||
setting_type = "select"
|
||||
default = "local_only"
|
||||
|
||||
[[settings.options]]
|
||||
value = "local_only"
|
||||
label = "Local only (no publishing)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "telegram"
|
||||
label = "Telegram channel"
|
||||
|
||||
[[settings.options]]
|
||||
value = "whatsapp"
|
||||
label = "WhatsApp contact/group"
|
||||
|
||||
[[settings.options]]
|
||||
value = "both"
|
||||
label = "Telegram + WhatsApp"
|
||||
|
||||
[[settings]]
|
||||
key = "telegram_bot_token"
|
||||
label = "Telegram Bot Token"
|
||||
description = "From @BotFather on Telegram (e.g. 123456:ABC-DEF...). Bot must be admin in the target channel."
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "telegram_chat_id"
|
||||
label = "Telegram Chat ID"
|
||||
description = "Channel: -100XXXXXXXXXX or @channelname. Group: numeric ID. Get it via @userinfobot."
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "whatsapp_token"
|
||||
label = "WhatsApp Access Token"
|
||||
description = "Permanent token from Meta Business Settings > System Users. Temporary tokens expire in 24h."
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "whatsapp_phone_id"
|
||||
label = "WhatsApp Phone Number ID"
|
||||
description = "From Meta Developer Portal > WhatsApp > API Setup (e.g. 1234567890)"
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "whatsapp_recipient"
|
||||
label = "WhatsApp Recipient"
|
||||
description = "Phone number in international format, no + or spaces (e.g. 14155551234)"
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
# ─── Agent configuration ─────────────────────────────────────────────────────
|
||||
|
||||
[agent]
|
||||
name = "clip-hand"
|
||||
description = "AI video editor — downloads, transcribes, and creates viral short clips from any video URL or file"
|
||||
module = "builtin:chat"
|
||||
provider = "default"
|
||||
model = "default"
|
||||
max_tokens = 8192
|
||||
temperature = 0.4
|
||||
max_iterations = 40
|
||||
system_prompt = """You are Clip Hand — an AI-powered shorts factory that turns any video URL or file into viral short clips.
|
||||
|
||||
## CRITICAL RULES — READ FIRST
|
||||
- You MUST use the `shell_exec` tool to run ALL commands (yt-dlp, ffmpeg, ffprobe, curl, whisper, etc.)
|
||||
- NEVER fabricate or hallucinate command output. Always run the actual command and read its real output.
|
||||
- NEVER skip steps. Follow the phases below in order. Each phase requires running real commands.
|
||||
- If a command fails, report the actual error. Do not invent fake success output.
|
||||
- For long-running commands (yt-dlp download, ffmpeg processing), set `timeout_seconds` to 300 in the shell_exec call. The default 30s is too short for video operations.
|
||||
|
||||
## Phase 0 — Platform Detection (ALWAYS DO THIS FIRST)
|
||||
|
||||
Before running any command, detect the operating system:
|
||||
```
|
||||
python -c "import platform; print(platform.system())"
|
||||
```
|
||||
Or check if a known path exists. Then set your approach:
|
||||
- **Windows**: stderr redirect = `2>NUL`, text search = `findstr`, delete = `del`, paths use forward slashes in ffmpeg filters
|
||||
- **macOS / Linux**: stderr redirect = `2>/dev/null`, text search = `grep`, delete = `rm`
|
||||
|
||||
IMPORTANT cross-platform rules:
|
||||
- ffmpeg/ffprobe/yt-dlp/whisper CLI flags are identical on all platforms
|
||||
- On Windows, the `subtitles` filter path MUST use forward slashes and escape drive colons: `subtitles=C\\:/Users/clip.srt` (not backslash)
|
||||
- On Windows, prefer `python -c "..."` over shell builtins for text processing
|
||||
- Always use `-y` on ffmpeg to avoid interactive prompts on all platforms
|
||||
|
||||
---
|
||||
|
||||
## Pipeline Overview
|
||||
|
||||
Your 8-phase pipeline: Intake → Download → Transcribe → Analyze → Extract → TTS (optional) → Publish (optional) → Report.
|
||||
The key insight: you READ the transcript to pick clips based on CONTENT, not visual scene changes.
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Intake
|
||||
|
||||
Detect input type and gather metadata.
|
||||
|
||||
**URL input** (YouTube, Vimeo, Twitter, etc.):
|
||||
```
|
||||
yt-dlp --dump-json "URL"
|
||||
```
|
||||
Extract from JSON: `duration`, `title`, `description`, `chapters`, `subtitles`, `automatic_captions`.
|
||||
If duration > 7200 seconds (2 hours), warn the user and ask which segment to focus on.
|
||||
|
||||
**Local file input**:
|
||||
```
|
||||
ffprobe -v quiet -print_format json -show_format -show_streams "file.mp4"
|
||||
```
|
||||
Extract: duration, resolution, codec info.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Download
|
||||
|
||||
**For URLs** — download video + attempt to grab existing subtitles:
|
||||
```
|
||||
yt-dlp -f "bv[height<=1080]+ba/b[height<=1080]" --restrict-filenames --no-playlist -o "source.%(ext)s" "URL"
|
||||
```
|
||||
Then try to grab existing auto-subs (YouTube often has these — saves transcription time):
|
||||
```
|
||||
yt-dlp --write-auto-subs --sub-lang en --sub-format json3 --skip-download --restrict-filenames -o "source" "URL"
|
||||
```
|
||||
If `source.en.json3` exists after the second command, you have YouTube auto-subs — skip whisper entirely.
|
||||
|
||||
**For local files** — just verify the file exists and is playable:
|
||||
```
|
||||
ffprobe -v error "file.mp4"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Transcribe
|
||||
|
||||
Check the **User Configuration** section (if present) for the chosen STT provider. Use the specified provider; if set to "auto" or absent, try each path in priority order.
|
||||
|
||||
### Path A: YouTube auto-subs exist (source.en.json3)
|
||||
Parse the json3 file directly. The format is:
|
||||
```json
|
||||
{"events": [{"tStartMs": 1230, "dDurationMs": 500, "segs": [{"utf8": "hello ", "tOffsetMs": 0}, {"utf8": "world", "tOffsetMs": 200}]}]}
|
||||
```
|
||||
Extract word-level timing: `word_start = (tStartMs + tOffsetMs) / 1000.0` seconds.
|
||||
Write a clean transcript with timestamps to `transcript.json`.
|
||||
|
||||
### Path B: Groq Whisper API (stt_provider = groq_whisper)
|
||||
Extract audio then call the Groq API:
|
||||
```
|
||||
ffmpeg -i source.mp4 -vn -ar 16000 -ac 1 -y audio.wav
|
||||
curl -s -X POST "https://api.groq.com/openai/v1/audio/transcriptions" \
|
||||
-H "Authorization: Bearer $GROQ_API_KEY" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "file=@audio.wav" -F "model=whisper-large-v3" \
|
||||
-F "response_format=verbose_json" -F "timestamp_granularities[]=word" \
|
||||
-o transcript_raw.json
|
||||
```
|
||||
Parse the response `words` array for word-level timing.
|
||||
|
||||
### Path C: OpenAI Whisper API (stt_provider = openai_whisper)
|
||||
```
|
||||
ffmpeg -i source.mp4 -vn -ar 16000 -ac 1 -y audio.wav
|
||||
curl -s -X POST "https://api.openai.com/v1/audio/transcriptions" \
|
||||
-H "Authorization: Bearer $OPENAI_API_KEY" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "file=@audio.wav" -F "model=whisper-1" \
|
||||
-F "response_format=verbose_json" -F "timestamp_granularities[]=word" \
|
||||
-o transcript_raw.json
|
||||
```
|
||||
|
||||
### Path D: Deepgram Nova-2 (stt_provider = deepgram)
|
||||
```
|
||||
ffmpeg -i source.mp4 -vn -ar 16000 -ac 1 -y audio.wav
|
||||
curl -s -X POST "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true&utterances=true&punctuate=true" \
|
||||
-H "Authorization: Token $DEEPGRAM_API_KEY" \
|
||||
-H "Content-Type: audio/wav" \
|
||||
--data-binary @audio.wav -o transcript_raw.json
|
||||
```
|
||||
Parse `results.channels[0].alternatives[0].words` for word-level timing.
|
||||
|
||||
### Path E: Local Whisper (stt_provider = whisper_local or auto fallback)
|
||||
```
|
||||
ffmpeg -i source.mp4 -vn -ar 16000 -ac 1 -y audio.wav
|
||||
whisper audio.wav --model small --output_format json --word_timestamps true --language en
|
||||
```
|
||||
This produces `audio.json` with segments containing word-level timing.
|
||||
If `whisper` is not found, try `whisper-ctranslate2` (same flags, 4x faster).
|
||||
|
||||
### Path F: No subtitles, no STT (fallback)
|
||||
Fall back to ffmpeg scene detection + silence detection.
|
||||
|
||||
Scene detection — run ffmpeg and look for `pts_time:` values in the output:
|
||||
```
|
||||
ffmpeg -i source.mp4 -filter:v "select='gt(scene,0.3)',showinfo" -f null - 2>&1
|
||||
```
|
||||
On macOS/Linux, pipe through `grep showinfo`. On Windows, pipe through `findstr showinfo`.
|
||||
|
||||
Silence detection — look for `silence_start` and `silence_end` in output:
|
||||
```
|
||||
ffmpeg -i source.mp4 -af "silencedetect=noise=-30dB:d=1.5" -f null - 2>&1
|
||||
```
|
||||
In this mode, you pick clips by visual scene changes and silence gaps. Skip Phase 4's transcript analysis.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Analyze & Pick Segments
|
||||
|
||||
THIS IS YOUR CORE VALUE. Read the full transcript and identify 3-5 segments worth clipping.
|
||||
|
||||
**What makes a viral clip:**
|
||||
- **Hook in the first 3 seconds** — a surprising claim, question, or emotional statement
|
||||
- **Self-contained story or insight** — makes sense without the full video
|
||||
- **Emotional peaks** — laughter, surprise, anger, vulnerability
|
||||
- **Controversial or contrarian takes** — things people want to share or argue about
|
||||
- **Insight density** — high ratio of interesting ideas per second
|
||||
- **Clean ending** — ends on a punchline, conclusion, or dramatic pause
|
||||
|
||||
**Segment selection rules:**
|
||||
- Each clip should be 30-90 seconds (sweet spot for shorts)
|
||||
- Start clips mid-sentence if the hook is stronger that way ("...and that's when I realized")
|
||||
- End on a strong beat — don't trail off
|
||||
- Avoid segments that require heavy visual context (charts, demos) unless the audio is compelling
|
||||
- Spread clips across the video — don't cluster them all in one section
|
||||
|
||||
**For each selected segment, note:**
|
||||
1. Exact start timestamp (seconds)
|
||||
2. Exact end timestamp (seconds)
|
||||
3. Suggested title (compelling, <60 chars)
|
||||
4. One-sentence virality reasoning
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Extract & Process
|
||||
|
||||
For each selected segment (N = 1, 2, 3, ...):
|
||||
|
||||
### Step 1: Extract the clip
|
||||
```
|
||||
ffmpeg -ss <start> -to <end> -i source.mp4 -c:v libx264 -c:a aac -preset fast -crf 23 -movflags +faststart -y clip_N.mp4
|
||||
```
|
||||
|
||||
### Step 2: Crop to vertical (9:16)
|
||||
```
|
||||
ffmpeg -i clip_N.mp4 -vf "crop=ih*9/16:ih:(iw-ih*9/16)/2:0,scale=1080:1920" -c:a copy -y clip_N_vert.mp4
|
||||
```
|
||||
If the source is already vertical or close to it, use scale+pad instead:
|
||||
```
|
||||
ffmpeg -i clip_N.mp4 -vf "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2:black" -c:a copy -y clip_N_vert.mp4
|
||||
```
|
||||
|
||||
### Step 3: Generate SRT captions from transcript
|
||||
Build an SRT file (`clip_N.srt`) from the word-level timestamps in your transcript.
|
||||
Use file_write to create it — do NOT rely on shell echo/redirection.
|
||||
Group words into subtitle lines of ~8-12 words (roughly 2-3 seconds each).
|
||||
Adjust timestamps to be relative to the clip start time.
|
||||
|
||||
SRT format:
|
||||
```
|
||||
1
|
||||
00:00:00,000 --> 00:00:02,500
|
||||
First line of caption text
|
||||
|
||||
2
|
||||
00:00:02,500 --> 00:00:05,100
|
||||
Second line of caption text
|
||||
```
|
||||
|
||||
### Step 4: Burn captions onto the clip
|
||||
IMPORTANT: On Windows, the subtitles filter path must use forward slashes and escape colons.
|
||||
If the SRT is in the current directory, just use the filename directly:
|
||||
```
|
||||
ffmpeg -i clip_N_vert.mp4 -vf "subtitles=clip_N.srt:force_style='FontSize=22,FontName=Arial,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,Outline=2,Alignment=2,MarginV=40'" -c:a copy -y clip_N_final.mp4
|
||||
```
|
||||
If using an absolute path on Windows, escape it: `subtitles=C\\:/Users/me/clip_N.srt`
|
||||
|
||||
### Step 4b: TTS voice-over (if tts_provider is set and not "none")
|
||||
Check the **User Configuration** for tts_provider. If a TTS provider is configured:
|
||||
|
||||
**edge_tts**:
|
||||
```
|
||||
edge-tts --text "Caption text for clip N" --voice en-US-AriaNeural --write-media tts_N.mp3
|
||||
ffmpeg -i clip_N_final.mp4 -i tts_N.mp3 -filter_complex "[0:a]volume=0.3[orig];[1:a]volume=1.0[tts];[orig][tts]amix=inputs=2:duration=first[out]" -map 0:v -map "[out]" -c:v copy -c:a aac -y clip_N_voiced.mp4
|
||||
```
|
||||
|
||||
**openai_tts**:
|
||||
```
|
||||
curl -s -X POST "https://api.openai.com/v1/audio/speech" \
|
||||
-H "Authorization: Bearer $OPENAI_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"tts-1","input":"Caption text for clip N","voice":"alloy"}' \
|
||||
--output tts_N.mp3
|
||||
ffmpeg -i clip_N_final.mp4 -i tts_N.mp3 -filter_complex "[0:a]volume=0.3[orig];[1:a]volume=1.0[tts];[orig][tts]amix=inputs=2:duration=first[out]" -map 0:v -map "[out]" -c:v copy -c:a aac -y clip_N_voiced.mp4
|
||||
```
|
||||
|
||||
**elevenlabs**:
|
||||
```
|
||||
curl -s -X POST "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM" \
|
||||
-H "xi-api-key: $ELEVENLABS_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text":"Caption text for clip N","model_id":"eleven_monolingual_v1"}' \
|
||||
--output tts_N.mp3
|
||||
ffmpeg -i clip_N_final.mp4 -i tts_N.mp3 -filter_complex "[0:a]volume=0.3[orig];[1:a]volume=1.0[tts];[orig][tts]amix=inputs=2:duration=first[out]" -map 0:v -map "[out]" -c:v copy -c:a aac -y clip_N_voiced.mp4
|
||||
```
|
||||
|
||||
If TTS was generated, rename `clip_N_voiced.mp4` to `clip_N_final.mp4` (replace).
|
||||
|
||||
### Step 5: Generate thumbnail
|
||||
```
|
||||
ffmpeg -i clip_N.mp4 -ss 2 -frames:v 1 -q:v 2 -y thumb_N.jpg
|
||||
```
|
||||
|
||||
### Cleanup
|
||||
Remove intermediate files (clip_N.mp4, clip_N_vert.mp4, tts_N.mp3) — keep only clip_N_final.mp4, clip_N.srt, and thumb_N.jpg.
|
||||
Use `del clip_N.mp4 clip_N_vert.mp4` on Windows, `rm clip_N.mp4 clip_N_vert.mp4` on macOS/Linux.
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Publish (Optional)
|
||||
|
||||
After all clips are processed and before the final report, check if publishing is configured.
|
||||
|
||||
### Step 1: Check settings
|
||||
Look at the `Publish Clips To` setting from User Configuration:
|
||||
- If `local_only`, absent, or empty → skip this phase entirely
|
||||
- If `telegram` → publish to Telegram only
|
||||
- If `whatsapp` → publish to WhatsApp only
|
||||
- If `both` → publish to both platforms
|
||||
|
||||
### Step 2: Validate credentials
|
||||
**Telegram** requires both:
|
||||
- `Telegram Bot Token` (non-empty)
|
||||
- `Telegram Chat ID` (non-empty)
|
||||
|
||||
**WhatsApp** requires all three:
|
||||
- `WhatsApp Access Token` (non-empty)
|
||||
- `WhatsApp Phone Number ID` (non-empty)
|
||||
- `WhatsApp Recipient` (non-empty)
|
||||
|
||||
If any required credential is missing, print a warning and skip that platform. Never fail the job over missing credentials.
|
||||
|
||||
### Step 3: Publish to Telegram
|
||||
For each `clip_N_final.mp4`:
|
||||
```
|
||||
curl -s -X POST "https://api.telegram.org/bot<TELEGRAM_BOT_TOKEN>/sendVideo" \
|
||||
-F "chat_id=<TELEGRAM_CHAT_ID>" \
|
||||
-F "video=@clip_N_final.mp4" \
|
||||
-F "caption=<clip title>" \
|
||||
-F "parse_mode=HTML" \
|
||||
-F "supports_streaming=true"
|
||||
```
|
||||
Check the response for `"ok": true`. If the response contains `"error_code": 413` or mentions file too large, re-encode:
|
||||
```
|
||||
ffmpeg -i clip_N_final.mp4 -fs 49M -c:v libx264 -crf 28 -preset fast -c:a aac -y clip_N_tg.mp4
|
||||
```
|
||||
Then retry with the smaller file.
|
||||
|
||||
### Step 4: Publish to WhatsApp
|
||||
WhatsApp Cloud API requires a two-step flow:
|
||||
|
||||
**Step 4a — Upload media:**
|
||||
```
|
||||
curl -s -X POST "https://graph.facebook.com/v21.0/<WHATSAPP_PHONE_ID>/media" \
|
||||
-H "Authorization: Bearer <WHATSAPP_TOKEN>" \
|
||||
-F "file=@clip_N_final.mp4" \
|
||||
-F "type=video/mp4" \
|
||||
-F "messaging_product=whatsapp"
|
||||
```
|
||||
Extract `id` from the response JSON.
|
||||
|
||||
If the file is over 16MB, re-encode first:
|
||||
```
|
||||
ffmpeg -i clip_N_final.mp4 -fs 15M -c:v libx264 -crf 30 -preset fast -c:a aac -y clip_N_wa.mp4
|
||||
```
|
||||
Then upload the smaller file.
|
||||
|
||||
**Step 4b — Send message:**
|
||||
```
|
||||
curl -s -X POST "https://graph.facebook.com/v21.0/<WHATSAPP_PHONE_ID>/messages" \
|
||||
-H "Authorization: Bearer <WHATSAPP_TOKEN>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"messaging_product":"whatsapp","to":"<WHATSAPP_RECIPIENT>","type":"video","video":{"id":"<MEDIA_ID>","caption":"<clip title>"}}'
|
||||
```
|
||||
|
||||
### Step 5: Rate limiting
|
||||
If publishing more than 3 clips, add a 1-second delay between sends:
|
||||
```
|
||||
sleep 1
|
||||
```
|
||||
|
||||
### Step 6: Publishing summary
|
||||
Build a summary table:
|
||||
|
||||
| # | Platform | Status | Details |
|
||||
|---|----------|--------|---------|
|
||||
| 1 | Telegram | Sent | message_id: 1234 |
|
||||
| 1 | WhatsApp | Sent | message_id: wamid.xxx |
|
||||
| 2 | Telegram | Failed | Re-encoded and retried |
|
||||
|
||||
Track counts of successful Telegram and WhatsApp publishes for the report phase.
|
||||
|
||||
IMPORTANT: Never expose API tokens in the summary or report. Mask any token references as `***`.
|
||||
|
||||
---
|
||||
|
||||
## Phase 7 — Report
|
||||
|
||||
After all clips are produced, report:
|
||||
|
||||
| # | Title | File | Duration | Size |
|
||||
|---|-------|------|----------|------|
|
||||
| 1 | "..." | clip_1_final.mp4 | 45s | 12MB |
|
||||
| 2 | "..." | clip_2_final.mp4 | 38s | 9MB |
|
||||
|
||||
Include file paths and thumbnail paths.
|
||||
|
||||
Update stats via memory_store:
|
||||
- `clip_hand_jobs_completed` — increment by 1
|
||||
- `clip_hand_clips_generated` — increment by number of clips made
|
||||
- `clip_hand_total_duration_secs` — increment by total clip duration
|
||||
- `clip_hand_clips_published_telegram` — increment by number of clips successfully sent to Telegram (0 if not configured)
|
||||
- `clip_hand_clips_published_whatsapp` — increment by number of clips successfully sent to WhatsApp (0 if not configured)
|
||||
|
||||
---
|
||||
|
||||
## Guidelines
|
||||
|
||||
- ALWAYS run Phase 0 (platform detection) first — adapt all commands to the detected OS
|
||||
- Always verify tools are available before starting (ffmpeg, ffprobe, yt-dlp)
|
||||
- Create output files in the same directory as the source (or current directory for URLs)
|
||||
- If the user specifies a number of clips, respect it; otherwise produce 3-5
|
||||
- If the user provides specific timestamps, skip Phase 4 and use those
|
||||
- If download or transcription fails, explain what went wrong and offer alternatives
|
||||
- Use `-y` flag on all ffmpeg commands to overwrite without prompting
|
||||
- For very long videos (>1hr), process in chunks to avoid memory issues
|
||||
- Use file_write tool for creating SRT/text files — never rely on shell echo/heredoc which varies by OS
|
||||
- All ffmpeg filter paths must use forward slashes, even on Windows
|
||||
- Never expose API tokens (Telegram, WhatsApp) in reports or summaries — always mask as `***`
|
||||
- Publishing errors are non-fatal — if a platform fails, log the error and continue with remaining clips/platforms
|
||||
- Respect rate limits: add 1-second delay between sends when publishing more than 3 clips
|
||||
"""
|
||||
|
||||
[dashboard]
|
||||
[[dashboard.metrics]]
|
||||
label = "Jobs Completed"
|
||||
memory_key = "clip_hand_jobs_completed"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Clips Generated"
|
||||
memory_key = "clip_hand_clips_generated"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Total Duration"
|
||||
memory_key = "clip_hand_total_duration_secs"
|
||||
format = "duration"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Published to Telegram"
|
||||
memory_key = "clip_hand_clips_published_telegram"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Published to WhatsApp"
|
||||
memory_key = "clip_hand_clips_published_whatsapp"
|
||||
format = "number"
|
||||
474
crates/openfang-hands/bundled/clip/SKILL.md
Normal file
474
crates/openfang-hands/bundled/clip/SKILL.md
Normal file
@@ -0,0 +1,474 @@
|
||||
---
|
||||
name: clip-hand-skill
|
||||
version: "2.0.0"
|
||||
description: "Expert knowledge for AI video clipping — yt-dlp downloading, whisper transcription, SRT generation, and ffmpeg processing"
|
||||
runtime: prompt_only
|
||||
---
|
||||
|
||||
# Video Clipping Expert Knowledge
|
||||
|
||||
## Cross-Platform Notes
|
||||
|
||||
All tools (ffmpeg, ffprobe, yt-dlp, whisper) use **identical CLI flags** on Windows, macOS, and Linux. The differences are only in shell syntax:
|
||||
|
||||
| Feature | macOS / Linux | Windows (cmd.exe) |
|
||||
|---------|---------------|-------------------|
|
||||
| Suppress stderr | `2>/dev/null` | `2>NUL` |
|
||||
| Filter output | `\| grep pattern` | `\| findstr pattern` |
|
||||
| Delete files | `rm file1 file2` | `del file1 file2` |
|
||||
| Null output device | `-f null -` | `-f null -` (same) |
|
||||
| ffmpeg subtitle paths | `subtitles=clip.srt` | `subtitles=clip.srt` (relative OK, absolute needs `C\\:/path`) |
|
||||
|
||||
IMPORTANT: ffmpeg filter paths (`-vf "subtitles=..."`) always need forward slashes. On Windows with absolute paths, escape the colon: `subtitles=C\\:/Users/me/clip.srt`
|
||||
|
||||
Prefer using `file_write` tool for creating SRT/text files instead of shell echo/heredoc.
|
||||
|
||||
---
|
||||
|
||||
## yt-dlp Reference
|
||||
|
||||
### Download with Format Selection
|
||||
```
|
||||
# Best video up to 1080p + best audio, merged
|
||||
yt-dlp -f "bv[height<=1080]+ba/b[height<=1080]" --restrict-filenames -o "source.%(ext)s" "URL"
|
||||
|
||||
# 720p max (smaller, faster)
|
||||
yt-dlp -f "bv[height<=720]+ba/b[height<=720]" --restrict-filenames -o "source.%(ext)s" "URL"
|
||||
|
||||
# Audio only (for transcription-only workflows)
|
||||
yt-dlp -x --audio-format wav --restrict-filenames -o "audio.%(ext)s" "URL"
|
||||
```
|
||||
|
||||
### Metadata Inspection
|
||||
```
|
||||
# Get full metadata as JSON (duration, title, chapters, available subs)
|
||||
yt-dlp --dump-json "URL"
|
||||
|
||||
# Key fields: duration, title, description, chapters, subtitles, automatic_captions
|
||||
```
|
||||
|
||||
### YouTube Auto-Subtitles
|
||||
```
|
||||
# Download auto-generated subtitles in json3 format (word-level timing)
|
||||
yt-dlp --write-auto-subs --sub-lang en --sub-format json3 --skip-download --restrict-filenames -o "source" "URL"
|
||||
|
||||
# Download manual subtitles if available
|
||||
yt-dlp --write-subs --sub-lang en --sub-format srt --skip-download --restrict-filenames -o "source" "URL"
|
||||
|
||||
# List available subtitle languages
|
||||
yt-dlp --list-subs "URL"
|
||||
```
|
||||
|
||||
### Useful Flags
|
||||
- `--restrict-filenames` — safe ASCII filenames (no spaces/special chars) — important on all platforms
|
||||
- `--no-playlist` — download single video even if URL is in a playlist
|
||||
- `-o "template.%(ext)s"` — output template (%(ext)s auto-detects format)
|
||||
- `--cookies-from-browser chrome` — use browser cookies for age-restricted content
|
||||
- `--extract-audio` / `-x` — extract audio only
|
||||
- `--audio-format wav` — convert audio to wav (for whisper)
|
||||
|
||||
---
|
||||
|
||||
## Whisper Transcription Reference
|
||||
|
||||
### Audio Extraction for Whisper
|
||||
```
|
||||
# Extract mono 16kHz WAV (whisper's preferred input format)
|
||||
ffmpeg -i source.mp4 -vn -ar 16000 -ac 1 -y audio.wav
|
||||
```
|
||||
|
||||
### Basic Transcription
|
||||
```
|
||||
# Standard transcription with word-level timestamps
|
||||
whisper audio.wav --model small --output_format json --word_timestamps true --language en
|
||||
|
||||
# Faster alternative (same flags, 4x speed)
|
||||
whisper-ctranslate2 audio.wav --model small --output_format json --word_timestamps true --language en
|
||||
```
|
||||
|
||||
### Model Sizes
|
||||
| Model | VRAM | Speed | Quality | Use When |
|
||||
|-------|------|-------|---------|----------|
|
||||
| tiny | ~1GB | Fastest | Rough | Quick previews, testing pipeline |
|
||||
| base | ~1GB | Fast | OK | Short clips, clear speech |
|
||||
| small | ~2GB | Good | Good | **Default — best balance** |
|
||||
| medium | ~5GB | Slow | Better | Important content, accented speech |
|
||||
| large-v3 | ~10GB | Slowest | Best | Final production, multiple languages |
|
||||
|
||||
Note: On macOS Apple Silicon, consider `mlx-whisper` as a faster native alternative.
|
||||
|
||||
### JSON Output Structure
|
||||
```json
|
||||
{
|
||||
"text": "full transcript text...",
|
||||
"segments": [
|
||||
{
|
||||
"id": 0,
|
||||
"start": 0.0,
|
||||
"end": 4.52,
|
||||
"text": " Hello everyone, welcome back.",
|
||||
"words": [
|
||||
{"word": " Hello", "start": 0.0, "end": 0.32, "probability": 0.95},
|
||||
{"word": " everyone,", "start": 0.32, "end": 0.78, "probability": 0.91},
|
||||
{"word": " welcome", "start": 0.78, "end": 1.14, "probability": 0.98},
|
||||
{"word": " back.", "start": 1.14, "end": 1.52, "probability": 0.97}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
- `segments[].words[]` gives word-level timing when `--word_timestamps true`
|
||||
- `probability` indicates confidence (< 0.5 = likely wrong)
|
||||
|
||||
---
|
||||
|
||||
## YouTube json3 Subtitle Parsing
|
||||
|
||||
### Format Structure
|
||||
```json
|
||||
{
|
||||
"events": [
|
||||
{
|
||||
"tStartMs": 1230,
|
||||
"dDurationMs": 5000,
|
||||
"segs": [
|
||||
{"utf8": "hello ", "tOffsetMs": 0},
|
||||
{"utf8": "world ", "tOffsetMs": 200},
|
||||
{"utf8": "how ", "tOffsetMs": 450},
|
||||
{"utf8": "are you", "tOffsetMs": 700}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Extracting Word Timing
|
||||
For each event and each segment within it:
|
||||
- `word_start_ms = event.tStartMs + seg.tOffsetMs`
|
||||
- `word_start_secs = word_start_ms / 1000.0`
|
||||
- `word_text = seg.utf8.trim()`
|
||||
|
||||
Events without `segs` are line breaks or formatting — skip them.
|
||||
Events with `segs` containing only `"\n"` are newlines — skip them.
|
||||
|
||||
---
|
||||
|
||||
## SRT Generation from Transcript
|
||||
|
||||
### SRT Format
|
||||
```
|
||||
1
|
||||
00:00:00,000 --> 00:00:02,500
|
||||
First line of caption text
|
||||
|
||||
2
|
||||
00:00:02,500 --> 00:00:05,100
|
||||
Second line of caption text
|
||||
```
|
||||
|
||||
### Rules for Building Good SRT
|
||||
- Group words into subtitle lines of ~8-12 words (2-3 seconds per line)
|
||||
- Break at natural pause points (periods, commas, clause boundaries)
|
||||
- Keep lines under 42 characters for readability on mobile
|
||||
- Adjust timestamps relative to clip start (subtract clip start time from all timestamps)
|
||||
- Timestamp format: `HH:MM:SS,mmm` (comma separator, not dot)
|
||||
- Each entry: index line, timestamp line, text line(s), blank line
|
||||
- Use `file_write` tool to create the SRT file — works identically on all platforms
|
||||
|
||||
### Styled Captions with ASS Format
|
||||
For animated/styled captions, use ASS subtitle format instead of SRT:
|
||||
```
|
||||
ffmpeg -i clip.mp4 -vf "subtitles=clip.ass:force_style='FontSize=22,FontName=Arial,Bold=1,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,Outline=2,Shadow=1,Alignment=2,MarginV=40'" -c:a copy output.mp4
|
||||
```
|
||||
|
||||
Key ASS style properties:
|
||||
- `PrimaryColour=&H00FFFFFF` — white text (AABBGGRR format)
|
||||
- `OutlineColour=&H00000000` — black outline
|
||||
- `Outline=2` — outline thickness
|
||||
- `Alignment=2` — bottom center
|
||||
- `MarginV=40` — margin from bottom edge
|
||||
- `FontSize=22` — good size for 1080x1920 vertical
|
||||
|
||||
---
|
||||
|
||||
## FFmpeg Video Processing
|
||||
|
||||
### Scene Detection
|
||||
```
|
||||
ffmpeg -i input.mp4 -filter:v "select='gt(scene,0.3)',showinfo" -f null - 2>&1
|
||||
```
|
||||
- Threshold 0.1 = very sensitive, 0.5 = only major cuts
|
||||
- Parse `pts_time:` from showinfo output for timestamps
|
||||
- On macOS/Linux pipe through `grep showinfo`, on Windows pipe through `findstr showinfo`
|
||||
|
||||
### Silence Detection
|
||||
```
|
||||
ffmpeg -i input.mp4 -af "silencedetect=noise=-30dB:d=1.5" -f null - 2>&1
|
||||
```
|
||||
- `d=1.5` = minimum 1.5 seconds of silence
|
||||
- Look for `silence_start` and `silence_end` in output
|
||||
|
||||
### Clip Extraction
|
||||
```
|
||||
# Re-encoded (accurate cuts)
|
||||
ffmpeg -ss 00:01:30 -to 00:02:15 -i input.mp4 -c:v libx264 -c:a aac -preset fast -crf 23 -movflags +faststart -y clip.mp4
|
||||
|
||||
# Lossless copy (fast but may have keyframe alignment issues)
|
||||
ffmpeg -ss 00:01:30 -to 00:02:15 -i input.mp4 -c copy -y clip.mp4
|
||||
```
|
||||
- `-ss` before `-i` = fast seek (recommended for extraction)
|
||||
- `-to` = end timestamp, `-t` = duration
|
||||
|
||||
### Vertical Video (9:16 for Shorts/Reels/TikTok)
|
||||
```
|
||||
# Center crop (when source is 16:9)
|
||||
ffmpeg -i input.mp4 -vf "crop=ih*9/16:ih:(iw-ih*9/16)/2:0,scale=1080:1920" -c:a copy output.mp4
|
||||
|
||||
# Scale with letterbox padding (preserves full frame)
|
||||
ffmpeg -i input.mp4 -vf "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2:black" -c:a copy output.mp4
|
||||
```
|
||||
|
||||
### Caption Burn-in
|
||||
```
|
||||
# SRT subtitles with styling (use relative path or forward-slash absolute path)
|
||||
ffmpeg -i input.mp4 -vf "subtitles=subs.srt:force_style='FontSize=22,FontName=Arial,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,Outline=2,Alignment=2,MarginV=40'" -c:a copy output.mp4
|
||||
|
||||
# Simple text overlay
|
||||
ffmpeg -i input.mp4 -vf "drawtext=text='Caption':fontsize=48:fontcolor=white:borderw=3:bordercolor=black:x=(w-text_w)/2:y=h-th-40" output.mp4
|
||||
```
|
||||
Windows path escaping: `subtitles=C\\:/Users/me/subs.srt` (double-backslash before colon)
|
||||
|
||||
### Thumbnail Generation
|
||||
```
|
||||
# At specific time (2 seconds in)
|
||||
ffmpeg -i input.mp4 -ss 2 -frames:v 1 -q:v 2 -y thumb.jpg
|
||||
|
||||
# Best keyframe
|
||||
ffmpeg -i input.mp4 -vf "select='eq(pict_type,I)',scale=1280:720" -frames:v 1 thumb.jpg
|
||||
|
||||
# Contact sheet
|
||||
ffmpeg -i input.mp4 -vf "fps=1/10,scale=320:-1,tile=4x4" contact.jpg
|
||||
```
|
||||
|
||||
### Video Analysis
|
||||
```
|
||||
# Full metadata (JSON)
|
||||
ffprobe -v quiet -print_format json -show_format -show_streams input.mp4
|
||||
|
||||
# Duration only
|
||||
ffprobe -v error -show_entries format=duration -of csv=p=0 input.mp4
|
||||
|
||||
# Resolution
|
||||
ffprobe -v error -select_streams v:0 -show_entries stream=width,height -of csv=p=0 input.mp4
|
||||
```
|
||||
|
||||
## API-Based STT Reference
|
||||
|
||||
### Groq Whisper API
|
||||
Fastest cloud STT — uses whisper-large-v3 on Groq hardware. Free tier available.
|
||||
```
|
||||
curl -s -X POST "https://api.groq.com/openai/v1/audio/transcriptions" \
|
||||
-H "Authorization: Bearer $GROQ_API_KEY" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "file=@audio.wav" \
|
||||
-F "model=whisper-large-v3" \
|
||||
-F "response_format=verbose_json" \
|
||||
-F "timestamp_granularities[]=word" \
|
||||
-o transcript_raw.json
|
||||
```
|
||||
Response: `{"text": "...", "words": [{"word": "hello", "start": 0.0, "end": 0.32}]}`
|
||||
- Max file size: 25MB. For longer audio, split with ffmpeg first.
|
||||
- `timestamp_granularities[]=word` is required for word-level timing.
|
||||
|
||||
### OpenAI Whisper API
|
||||
```
|
||||
curl -s -X POST "https://api.openai.com/v1/audio/transcriptions" \
|
||||
-H "Authorization: Bearer $OPENAI_API_KEY" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "file=@audio.wav" \
|
||||
-F "model=whisper-1" \
|
||||
-F "response_format=verbose_json" \
|
||||
-F "timestamp_granularities[]=word" \
|
||||
-o transcript_raw.json
|
||||
```
|
||||
Response format same as Groq. Max 25MB.
|
||||
|
||||
### Deepgram Nova-2
|
||||
```
|
||||
curl -s -X POST "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true&utterances=true&punctuate=true" \
|
||||
-H "Authorization: Token $DEEPGRAM_API_KEY" \
|
||||
-H "Content-Type: audio/wav" \
|
||||
--data-binary @audio.wav \
|
||||
-o transcript_raw.json
|
||||
```
|
||||
Response: `{"results": {"channels": [{"alternatives": [{"words": [{"word": "hello", "start": 0.0, "end": 0.32, "confidence": 0.99}]}]}]}}`
|
||||
- Supports streaming, but for clips use batch mode.
|
||||
- `smart_format=true` adds punctuation and casing.
|
||||
|
||||
---
|
||||
|
||||
## TTS Reference
|
||||
|
||||
### Edge TTS (free, no API key needed)
|
||||
```
|
||||
# List available voices
|
||||
edge-tts --list-voices
|
||||
|
||||
# Generate speech
|
||||
edge-tts --text "Your caption text here" --voice en-US-AriaNeural --write-media tts_output.mp3
|
||||
|
||||
# Other good voices: en-US-GuyNeural, en-GB-SoniaNeural, en-AU-NatashaNeural
|
||||
```
|
||||
Install: `pip install edge-tts`
|
||||
|
||||
### OpenAI TTS
|
||||
```
|
||||
curl -s -X POST "https://api.openai.com/v1/audio/speech" \
|
||||
-H "Authorization: Bearer $OPENAI_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"tts-1","input":"Your text here","voice":"alloy"}' \
|
||||
--output tts_output.mp3
|
||||
```
|
||||
Voices: `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer`
|
||||
Models: `tts-1` (fast), `tts-1-hd` (quality)
|
||||
|
||||
### ElevenLabs
|
||||
```
|
||||
curl -s -X POST "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM" \
|
||||
-H "xi-api-key: $ELEVENLABS_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text":"Your text here","model_id":"eleven_monolingual_v1"}' \
|
||||
--output tts_output.mp3
|
||||
```
|
||||
Voice ID `21m00Tcm4TlvDq8ikWAM` = Rachel (default). List voices: `GET /v1/voices`
|
||||
|
||||
### Audio Merging (TTS + Original)
|
||||
```
|
||||
# Mix TTS over original audio (original at 30% volume, TTS at 100%)
|
||||
ffmpeg -i clip.mp4 -i tts.mp3 \
|
||||
-filter_complex "[0:a]volume=0.3[orig];[1:a]volume=1.0[tts];[orig][tts]amix=inputs=2:duration=first[out]" \
|
||||
-map 0:v -map "[out]" -c:v copy -c:a aac -y clip_voiced.mp4
|
||||
|
||||
# Replace audio entirely (no original audio)
|
||||
ffmpeg -i clip.mp4 -i tts.mp3 -map 0:v -map 1:a -c:v copy -c:a aac -shortest -y clip_voiced.mp4
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quality & Performance Tips
|
||||
|
||||
- Use `-preset ultrafast` for quick previews, `-preset slow` for final output
|
||||
- Use `-crf 23` for good quality (18=high, 28=low, lower=bigger files)
|
||||
- Add `-movflags +faststart` for web-friendly MP4
|
||||
- Use `-threads 0` to auto-detect CPU cores
|
||||
- Always use `-y` to overwrite without asking
|
||||
|
||||
---
|
||||
|
||||
## Telegram Bot API Reference
|
||||
|
||||
### sendVideo — Upload and send a video to a chat/channel
|
||||
```
|
||||
curl -s -X POST "https://api.telegram.org/bot<BOT_TOKEN>/sendVideo" \
|
||||
-F "chat_id=<CHAT_ID>" \
|
||||
-F "video=@clip_N_final.mp4" \
|
||||
-F "caption=Clip title here" \
|
||||
-F "parse_mode=HTML" \
|
||||
-F "supports_streaming=true"
|
||||
```
|
||||
|
||||
### Parameters
|
||||
| Parameter | Required | Description |
|
||||
|-----------|----------|-------------|
|
||||
| `chat_id` | Yes | Channel (`-100XXXXXXXXXX` or `@channelname`), group, or user numeric ID |
|
||||
| `video` | Yes | `@filepath` for upload (max 50MB) or a Telegram `file_id` for re-send |
|
||||
| `caption` | No | Text caption, up to 1024 characters |
|
||||
| `parse_mode` | No | `HTML` or `MarkdownV2` for styled captions |
|
||||
| `supports_streaming` | No | `true` enables progressive playback |
|
||||
|
||||
### Success Response
|
||||
```json
|
||||
{"ok": true, "result": {"message_id": 1234, "video": {"file_id": "BAACAgI...", "file_size": 5242880}}}
|
||||
```
|
||||
|
||||
### Error Response
|
||||
```json
|
||||
{"ok": false, "error_code": 400, "description": "Bad Request: chat not found"}
|
||||
```
|
||||
|
||||
### Common Errors
|
||||
| Error Code | Description | Fix |
|
||||
|------------|-------------|-----|
|
||||
| 400 | Chat not found | Verify chat_id; bot must be added to the channel/group |
|
||||
| 401 | Unauthorized | Bot token is invalid or revoked — regenerate via @BotFather |
|
||||
| 413 | Request entity too large | File exceeds 50MB — re-encode: `ffmpeg -i input.mp4 -fs 49M -c:v libx264 -crf 28 -preset fast -c:a aac -y output.mp4` |
|
||||
| 429 | Too many requests | Rate limited — wait the `retry_after` seconds from the response |
|
||||
|
||||
### File Size Limit
|
||||
Telegram allows up to **50MB** for video uploads via Bot API. If a clip exceeds this:
|
||||
```
|
||||
ffmpeg -i clip_N_final.mp4 -fs 49M -c:v libx264 -crf 28 -preset fast -c:a aac -movflags +faststart -y clip_N_tg.mp4
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## WhatsApp Business Cloud API Reference
|
||||
|
||||
### Two-Step Flow: Upload Media → Send Message
|
||||
|
||||
WhatsApp Cloud API requires uploading the video first to get a `media_id`, then sending a message referencing that ID.
|
||||
|
||||
### Step 1 — Upload Media
|
||||
```
|
||||
curl -s -X POST "https://graph.facebook.com/v21.0/<PHONE_NUMBER_ID>/media" \
|
||||
-H "Authorization: Bearer <ACCESS_TOKEN>" \
|
||||
-F "file=@clip_N_final.mp4" \
|
||||
-F "type=video/mp4" \
|
||||
-F "messaging_product=whatsapp"
|
||||
```
|
||||
|
||||
Success response:
|
||||
```json
|
||||
{"id": "1234567890"}
|
||||
```
|
||||
|
||||
### Step 2 — Send Video Message
|
||||
```
|
||||
curl -s -X POST "https://graph.facebook.com/v21.0/<PHONE_NUMBER_ID>/messages" \
|
||||
-H "Authorization: Bearer <ACCESS_TOKEN>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"messaging_product": "whatsapp",
|
||||
"to": "<RECIPIENT_PHONE>",
|
||||
"type": "video",
|
||||
"video": {
|
||||
"id": "<MEDIA_ID>",
|
||||
"caption": "Clip title here"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
Success response:
|
||||
```json
|
||||
{"messaging_product": "whatsapp", "contacts": [{"wa_id": "14155551234"}], "messages": [{"id": "wamid.HBgL..."}]}
|
||||
```
|
||||
|
||||
### File Size Limit
|
||||
WhatsApp allows up to **16MB** for video uploads. If a clip exceeds this:
|
||||
```
|
||||
ffmpeg -i clip_N_final.mp4 -fs 15M -c:v libx264 -crf 30 -preset fast -c:a aac -movflags +faststart -y clip_N_wa.mp4
|
||||
```
|
||||
|
||||
### 24-Hour Messaging Window
|
||||
WhatsApp requires the recipient to have messaged you within the last 24 hours (for non-template messages). If you get a "template required" error, either:
|
||||
- Ask the recipient to send any message to the business number first
|
||||
- Use a pre-approved message template instead of a free-form video message
|
||||
|
||||
### Common Errors
|
||||
| Error Code | Description | Fix |
|
||||
|------------|-------------|-----|
|
||||
| 100 | Invalid parameter | Check phone_number_id and recipient format (no + prefix, no spaces) |
|
||||
| 190 | Invalid/expired access token | Regenerate token in Meta Business Settings; temporary tokens expire in 24h |
|
||||
| 131030 | Recipient not in allowed list | In test mode, add recipient to allowed numbers in Meta Developer Portal |
|
||||
| 131047 | Re-engagement message / template required | Recipient hasn't messaged within 24h — use a template or ask them to message first |
|
||||
| 131053 | Media upload failed | File too large or unsupported format — re-encode as MP4 under 16MB |
|
||||
345
crates/openfang-hands/bundled/collector/HAND.toml
Normal file
345
crates/openfang-hands/bundled/collector/HAND.toml
Normal file
@@ -0,0 +1,345 @@
|
||||
id = "collector"
|
||||
name = "Collector Hand"
|
||||
description = "Autonomous intelligence collector — monitors any target continuously with change detection and knowledge graphs"
|
||||
category = "data"
|
||||
icon = "\U0001F50D"
|
||||
tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query", "event_publish"]
|
||||
|
||||
# ─── Configurable settings ───────────────────────────────────────────────────
|
||||
|
||||
[[settings]]
|
||||
key = "target_subject"
|
||||
label = "Target Subject"
|
||||
description = "What to monitor (company name, person, technology, market, topic)"
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "collection_depth"
|
||||
label = "Collection Depth"
|
||||
description = "How deep to dig on each cycle"
|
||||
setting_type = "select"
|
||||
default = "deep"
|
||||
|
||||
[[settings.options]]
|
||||
value = "surface"
|
||||
label = "Surface (headlines only)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "deep"
|
||||
label = "Deep (full articles + sources)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "exhaustive"
|
||||
label = "Exhaustive (multi-hop research)"
|
||||
|
||||
[[settings]]
|
||||
key = "update_frequency"
|
||||
label = "Update Frequency"
|
||||
description = "How often to run collection sweeps"
|
||||
setting_type = "select"
|
||||
default = "daily"
|
||||
|
||||
[[settings.options]]
|
||||
value = "hourly"
|
||||
label = "Every hour"
|
||||
|
||||
[[settings.options]]
|
||||
value = "every_6h"
|
||||
label = "Every 6 hours"
|
||||
|
||||
[[settings.options]]
|
||||
value = "daily"
|
||||
label = "Daily"
|
||||
|
||||
[[settings.options]]
|
||||
value = "weekly"
|
||||
label = "Weekly"
|
||||
|
||||
[[settings]]
|
||||
key = "focus_area"
|
||||
label = "Focus Area"
|
||||
description = "Lens through which to analyze collected intelligence"
|
||||
setting_type = "select"
|
||||
default = "general"
|
||||
|
||||
[[settings.options]]
|
||||
value = "market"
|
||||
label = "Market Intelligence"
|
||||
|
||||
[[settings.options]]
|
||||
value = "business"
|
||||
label = "Business Intelligence"
|
||||
|
||||
[[settings.options]]
|
||||
value = "competitor"
|
||||
label = "Competitor Analysis"
|
||||
|
||||
[[settings.options]]
|
||||
value = "person"
|
||||
label = "Person Tracking"
|
||||
|
||||
[[settings.options]]
|
||||
value = "technology"
|
||||
label = "Technology Monitoring"
|
||||
|
||||
[[settings.options]]
|
||||
value = "general"
|
||||
label = "General Intelligence"
|
||||
|
||||
[[settings]]
|
||||
key = "alert_on_changes"
|
||||
label = "Alert on Changes"
|
||||
description = "Publish an event when significant changes are detected"
|
||||
setting_type = "toggle"
|
||||
default = "true"
|
||||
|
||||
[[settings]]
|
||||
key = "report_format"
|
||||
label = "Report Format"
|
||||
description = "Output format for intelligence reports"
|
||||
setting_type = "select"
|
||||
default = "markdown"
|
||||
|
||||
[[settings.options]]
|
||||
value = "markdown"
|
||||
label = "Markdown"
|
||||
|
||||
[[settings.options]]
|
||||
value = "json"
|
||||
label = "JSON"
|
||||
|
||||
[[settings.options]]
|
||||
value = "html"
|
||||
label = "HTML"
|
||||
|
||||
[[settings]]
|
||||
key = "max_sources_per_cycle"
|
||||
label = "Max Sources Per Cycle"
|
||||
description = "Maximum number of sources to process per collection sweep"
|
||||
setting_type = "select"
|
||||
default = "30"
|
||||
|
||||
[[settings.options]]
|
||||
value = "10"
|
||||
label = "10 sources"
|
||||
|
||||
[[settings.options]]
|
||||
value = "30"
|
||||
label = "30 sources"
|
||||
|
||||
[[settings.options]]
|
||||
value = "50"
|
||||
label = "50 sources"
|
||||
|
||||
[[settings.options]]
|
||||
value = "100"
|
||||
label = "100 sources"
|
||||
|
||||
[[settings]]
|
||||
key = "track_sentiment"
|
||||
label = "Track Sentiment"
|
||||
description = "Analyze and track sentiment trends over time"
|
||||
setting_type = "toggle"
|
||||
default = "false"
|
||||
|
||||
# ─── Agent configuration ─────────────────────────────────────────────────────
|
||||
|
||||
[agent]
|
||||
name = "collector-hand"
|
||||
description = "AI intelligence collector — monitors any target continuously with OSINT techniques, knowledge graphs, and change detection"
|
||||
module = "builtin:chat"
|
||||
provider = "default"
|
||||
model = "default"
|
||||
max_tokens = 16384
|
||||
temperature = 0.3
|
||||
max_iterations = 60
|
||||
system_prompt = """You are Collector Hand — an autonomous intelligence collector that monitors any target 24/7, building a living knowledge graph and detecting changes over time.
|
||||
|
||||
## Phase 0 — Platform Detection & State Recovery (ALWAYS DO THIS FIRST)
|
||||
|
||||
Detect the operating system:
|
||||
```
|
||||
python -c "import platform; print(platform.system())"
|
||||
```
|
||||
|
||||
Then recover state:
|
||||
1. memory_recall `collector_hand_state` — if it exists, load previous collection state
|
||||
2. Read the **User Configuration** for target_subject, focus_area, collection_depth, etc.
|
||||
3. file_read `collector_knowledge_base.json` if it exists — this is your cumulative intel
|
||||
4. knowledge_query for existing entities related to the target
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Schedule & Target Initialization
|
||||
|
||||
On first run:
|
||||
1. Create collection schedule using schedule_create based on `update_frequency`
|
||||
2. Parse the `target_subject` — identify what type of target it is:
|
||||
- Company: look for products, leadership, funding, partnerships, news
|
||||
- Person: look for publications, talks, job changes, social activity
|
||||
- Technology: look for releases, adoption, benchmarks, competitors
|
||||
- Market: look for trends, players, reports, regulations
|
||||
- Competitor: look for product launches, pricing, customer reviews, hiring
|
||||
3. Build initial query set (10-20 queries tailored to target type and focus area)
|
||||
4. Store target profile in knowledge graph
|
||||
|
||||
On subsequent runs:
|
||||
1. Load previous query set and results
|
||||
2. Check what's new since last collection
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Source Discovery & Query Construction
|
||||
|
||||
Build targeted search queries based on focus_area:
|
||||
|
||||
**Market Intelligence**: "[target] market size", "[target] industry trends", "[target] competitive landscape"
|
||||
**Business Intelligence**: "[target] revenue", "[target] partnerships", "[target] strategy", "[target] leadership"
|
||||
**Competitor Analysis**: "[target] vs [competitor]", "[target] pricing", "[target] product launch", "[target] customer reviews"
|
||||
**Person Tracking**: "[person] interview", "[person] talk", "[person] publication", "[person] [company]"
|
||||
**Technology Monitoring**: "[target] release", "[target] benchmark", "[target] adoption", "[target] alternative"
|
||||
**General**: "[target] news", "[target] latest", "[target] analysis", "[target] report"
|
||||
|
||||
Add temporal queries: "[target] this week", "[target] 2025"
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Collection Sweep
|
||||
|
||||
For each query (up to `max_sources_per_cycle`):
|
||||
1. web_search the query
|
||||
2. For each promising result, web_fetch to extract full content
|
||||
3. Extract key entities: people, companies, products, dates, numbers, events
|
||||
4. Tag each data point with:
|
||||
- Source URL
|
||||
- Collection timestamp
|
||||
- Confidence level (high/medium/low based on source quality)
|
||||
- Relevance score (0-100)
|
||||
|
||||
Apply source quality heuristics:
|
||||
- Official sources (company websites, SEC filings, press releases) = high confidence
|
||||
- News outlets (established media) = medium-high confidence
|
||||
- Blog posts, social media = medium confidence
|
||||
- Forums, anonymous sources = low confidence
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Knowledge Graph Construction
|
||||
|
||||
For each collected data point:
|
||||
1. knowledge_add_entity for new entities (people, companies, products, events)
|
||||
2. knowledge_add_relation for relationships between entities
|
||||
3. Attach metadata: source, timestamp, confidence, focus_area
|
||||
|
||||
Entity types to track:
|
||||
- Person (name, role, company, last_seen)
|
||||
- Company (name, industry, size, funding_stage)
|
||||
- Product (name, company, category, launch_date)
|
||||
- Event (type, date, entities_involved, significance)
|
||||
- Number (metric, value, date, context)
|
||||
|
||||
Relation types:
|
||||
- works_at, founded, invested_in, partnered_with, competes_with
|
||||
- launched, acquired, mentioned_in, related_to
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Change Detection & Delta Analysis
|
||||
|
||||
Compare current collection against previous state:
|
||||
1. Load `collector_knowledge_base.json` (previous snapshot)
|
||||
2. Identify CHANGES:
|
||||
- New entities not in previous snapshot
|
||||
- Changed attributes (e.g., person changed company, new funding round)
|
||||
- New relationships between known entities
|
||||
- Disappeared entities (no longer mentioned)
|
||||
3. Score each change by significance (critical/important/minor):
|
||||
- Critical: leadership change, acquisition, major funding, product launch
|
||||
- Important: new partnership, hiring surge, pricing change, competitor move
|
||||
- Minor: blog post, minor update, mention in article
|
||||
|
||||
If `alert_on_changes` is enabled and critical changes found:
|
||||
- event_publish with change summary
|
||||
|
||||
If `track_sentiment` is enabled:
|
||||
- Classify each source as positive/negative/neutral toward the target
|
||||
- Track sentiment trend vs previous cycle
|
||||
- Note significant sentiment shifts in the report
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Report Generation
|
||||
|
||||
Generate an intelligence report in the configured `report_format`:
|
||||
|
||||
**Markdown format**:
|
||||
```markdown
|
||||
# Intelligence Report: [target_subject]
|
||||
**Date**: YYYY-MM-DD | **Cycle**: N | **Sources Processed**: X
|
||||
|
||||
## Key Changes Since Last Report
|
||||
- [Critical/Important changes with details]
|
||||
|
||||
## Intelligence Summary
|
||||
[2-3 paragraph synthesis of collected intelligence]
|
||||
|
||||
## Entity Map
|
||||
| Entity | Type | Status | Confidence |
|
||||
|--------|------|--------|------------|
|
||||
|
||||
## Sources
|
||||
1. [Source title](url) — confidence: high — extracted: [key facts]
|
||||
|
||||
## Sentiment Trend (if enabled)
|
||||
Positive: X% | Neutral: Y% | Negative: Z% | Trend: [up/down/stable]
|
||||
```
|
||||
|
||||
Save to: `collector_report_YYYY-MM-DD.{md,json,html}`
|
||||
|
||||
---
|
||||
|
||||
## Phase 7 — State Persistence
|
||||
|
||||
1. Save updated knowledge base to `collector_knowledge_base.json`
|
||||
2. memory_store `collector_hand_state`: last_run, cycle_count, entities_tracked, total_sources
|
||||
3. Update dashboard stats:
|
||||
- memory_store `collector_hand_data_points` — total data points collected
|
||||
- memory_store `collector_hand_entities_tracked` — unique entities in knowledge graph
|
||||
- memory_store `collector_hand_reports_generated` — increment report count
|
||||
- memory_store `collector_hand_last_update` — current timestamp
|
||||
|
||||
---
|
||||
|
||||
## Guidelines
|
||||
|
||||
- NEVER fabricate intelligence — every claim must be sourced
|
||||
- Cross-reference critical claims across multiple sources before reporting
|
||||
- Clearly distinguish facts from analysis/speculation in reports
|
||||
- Respect rate limits — add delays between web fetches
|
||||
- If a source is behind a paywall, note it as "paywalled" and extract what's visible
|
||||
- Prioritize recency — newer information is generally more valuable
|
||||
- If the user messages you directly, pause collection and respond to their question
|
||||
- For competitor analysis, maintain objectivity — report facts, not opinions
|
||||
"""
|
||||
|
||||
[dashboard]
|
||||
[[dashboard.metrics]]
|
||||
label = "Data Points"
|
||||
memory_key = "collector_hand_data_points"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Entities Tracked"
|
||||
memory_key = "collector_hand_entities_tracked"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Reports Generated"
|
||||
memory_key = "collector_hand_reports_generated"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Last Update"
|
||||
memory_key = "collector_hand_last_update"
|
||||
format = "text"
|
||||
271
crates/openfang-hands/bundled/collector/SKILL.md
Normal file
271
crates/openfang-hands/bundled/collector/SKILL.md
Normal file
@@ -0,0 +1,271 @@
|
||||
---
|
||||
name: collector-hand-skill
|
||||
version: "1.0.0"
|
||||
description: "Expert knowledge for AI intelligence collection — OSINT methodology, entity extraction, knowledge graphs, change detection, and sentiment analysis"
|
||||
runtime: prompt_only
|
||||
---
|
||||
|
||||
# Intelligence Collection Expert Knowledge
|
||||
|
||||
## OSINT Methodology
|
||||
|
||||
### Collection Cycle
|
||||
1. **Planning**: Define target, scope, and collection requirements
|
||||
2. **Collection**: Gather raw data from open sources
|
||||
3. **Processing**: Extract entities, relationships, and data points
|
||||
4. **Analysis**: Synthesize findings, identify patterns, detect changes
|
||||
5. **Dissemination**: Generate reports, alerts, and updates
|
||||
6. **Feedback**: Refine queries based on what worked and what didn't
|
||||
|
||||
### Source Categories (by reliability)
|
||||
| Tier | Source Type | Reliability | Examples |
|
||||
|------|-----------|-------------|---------|
|
||||
| 1 | Official/Primary | Very High | Company filings, government data, press releases |
|
||||
| 2 | Institutional | High | News agencies (Reuters, AP), research institutions |
|
||||
| 3 | Professional | Medium-High | Industry publications, analyst reports, expert blogs |
|
||||
| 4 | Community | Medium | Forums, social media, review sites |
|
||||
| 5 | Anonymous/Unverified | Low | Anonymous posts, rumors, unattributed claims |
|
||||
|
||||
### Search Query Construction by Focus Area
|
||||
|
||||
**Market Intelligence**:
|
||||
```
|
||||
"[target] market share"
|
||||
"[target] industry report [year]"
|
||||
"[target] TAM SAM SOM"
|
||||
"[target] growth rate"
|
||||
"[target] market analysis"
|
||||
"[target industry] trends [year]"
|
||||
```
|
||||
|
||||
**Business Intelligence**:
|
||||
```
|
||||
"[company] revenue" OR "[company] earnings"
|
||||
"[company] CEO" OR "[company] leadership team"
|
||||
"[company] strategy" OR "[company] roadmap"
|
||||
"[company] partnerships" OR "[company] acquisition"
|
||||
"[company] annual report" OR "[company] 10-K"
|
||||
site:sec.gov "[company]"
|
||||
```
|
||||
|
||||
**Competitor Analysis**:
|
||||
```
|
||||
"[company] vs [competitor]"
|
||||
"[company] alternative"
|
||||
"[company] review" OR "[company] comparison"
|
||||
"[company] pricing" site:g2.com OR site:capterra.com
|
||||
"[company] customer reviews" site:trustpilot.com
|
||||
"switch from [company] to"
|
||||
```
|
||||
|
||||
**Person Tracking**:
|
||||
```
|
||||
"[person name]" "[company]"
|
||||
"[person name]" interview OR podcast OR keynote
|
||||
"[person name]" site:linkedin.com
|
||||
"[person name]" publication OR paper
|
||||
"[person name]" conference OR summit
|
||||
```
|
||||
|
||||
**Technology Monitoring**:
|
||||
```
|
||||
"[technology] release" OR "[technology] update"
|
||||
"[technology] benchmark [year]"
|
||||
"[technology] adoption" OR "[technology] usage statistics"
|
||||
"[technology] vs [alternative]"
|
||||
"[technology]" site:github.com
|
||||
"[technology] roadmap" OR "[technology] changelog"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Entity Extraction Patterns
|
||||
|
||||
### Named Entity Types
|
||||
1. **Person**: Name, title, organization, role
|
||||
2. **Organization**: Company name, type, industry, location, size
|
||||
3. **Product**: Product name, company, category, version
|
||||
4. **Event**: Type, date, participants, location, significance
|
||||
5. **Financial**: Amount, currency, type (funding, revenue, valuation)
|
||||
6. **Technology**: Name, version, category, vendor
|
||||
7. **Location**: City, state, country, region
|
||||
8. **Date/Time**: Specific dates, time ranges, deadlines
|
||||
|
||||
### Extraction Heuristics
|
||||
- **Person detection**: Title + Name pattern ("CEO John Smith"), bylines, quoted speakers
|
||||
- **Organization detection**: Legal suffixes (Inc, LLC), "at [Company]", domain names
|
||||
- **Financial detection**: Currency symbols, "raised $X", "valued at", "revenue of"
|
||||
- **Event detection**: Date + verb ("launched on", "announced at", "acquired")
|
||||
- **Technology detection**: CamelCase names, version numbers, "built with", "powered by"
|
||||
|
||||
---
|
||||
|
||||
## Knowledge Graph Best Practices
|
||||
|
||||
### Entity Schema
|
||||
```json
|
||||
{
|
||||
"entity_id": "unique_id",
|
||||
"name": "Entity Name",
|
||||
"type": "person|company|product|event|technology",
|
||||
"attributes": {
|
||||
"key": "value"
|
||||
},
|
||||
"sources": ["url1", "url2"],
|
||||
"first_seen": "timestamp",
|
||||
"last_seen": "timestamp",
|
||||
"confidence": "high|medium|low"
|
||||
}
|
||||
```
|
||||
|
||||
### Relation Schema
|
||||
```json
|
||||
{
|
||||
"source_entity": "entity_id_1",
|
||||
"relation": "works_at|founded|competes_with|...",
|
||||
"target_entity": "entity_id_2",
|
||||
"attributes": {
|
||||
"since": "date",
|
||||
"context": "description"
|
||||
},
|
||||
"source": "url",
|
||||
"confidence": "high|medium|low"
|
||||
}
|
||||
```
|
||||
|
||||
### Common Relations
|
||||
| Relation | Between | Example |
|
||||
|----------|---------|---------|
|
||||
| works_at | Person → Company | "Jane Smith works at Acme" |
|
||||
| founded | Person → Company | "John Doe founded StartupX" |
|
||||
| invested_in | Company → Company | "VC Fund invested in StartupX" |
|
||||
| competes_with | Company → Company | "Acme competes with BetaCo" |
|
||||
| partnered_with | Company → Company | "Acme partnered with CloudY" |
|
||||
| launched | Company → Product | "Acme launched ProductZ" |
|
||||
| acquired | Company → Company | "BigCorp acquired StartupX" |
|
||||
| uses | Company → Technology | "Acme uses Kubernetes" |
|
||||
| mentioned_in | Entity → Source | "Acme mentioned in TechCrunch" |
|
||||
|
||||
---
|
||||
|
||||
## Change Detection Methodology
|
||||
|
||||
### Snapshot Comparison
|
||||
1. Store the current state of all entities as a JSON snapshot
|
||||
2. On next collection cycle, compare new state against previous snapshot
|
||||
3. Classify changes:
|
||||
|
||||
| Change Type | Significance | Example |
|
||||
|-------------|-------------|---------|
|
||||
| Entity appeared | Varies | New competitor enters market |
|
||||
| Entity disappeared | Important | Company goes quiet, product deprecated |
|
||||
| Attribute changed | Critical-Minor | CEO changed (critical), address changed (minor) |
|
||||
| New relation | Important | New partnership, acquisition, hiring |
|
||||
| Relation removed | Important | Person left company, partnership ended |
|
||||
| Sentiment shift | Important | Positive→Negative media coverage |
|
||||
|
||||
### Significance Scoring
|
||||
```
|
||||
CRITICAL (immediate alert):
|
||||
- Leadership change (CEO, CTO, board)
|
||||
- Acquisition or merger
|
||||
- Major funding round (>$10M)
|
||||
- Product discontinuation
|
||||
- Legal action or regulatory issue
|
||||
|
||||
IMPORTANT (include in next report):
|
||||
- New product launch
|
||||
- New partnership or integration
|
||||
- Hiring surge (>5 roles)
|
||||
- Pricing change
|
||||
- Competitor move
|
||||
- Major customer win/loss
|
||||
|
||||
MINOR (note in report):
|
||||
- Blog post or press mention
|
||||
- Minor update or patch
|
||||
- Social media activity spike
|
||||
- Conference appearance
|
||||
- Job posting (individual)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Sentiment Analysis Heuristics
|
||||
|
||||
When `track_sentiment` is enabled, classify each source's tone:
|
||||
|
||||
### Classification Rules
|
||||
- **Positive indicators**: "growth", "innovation", "breakthrough", "success", "award", "expansion", "praise", "recommend"
|
||||
- **Negative indicators**: "lawsuit", "layoffs", "decline", "controversy", "failure", "breach", "criticism", "warning"
|
||||
- **Neutral indicators**: factual reporting without strong adjectives, data-only articles, announcements
|
||||
|
||||
### Sentiment Scoring
|
||||
```
|
||||
Strong positive: +2 (e.g., "Company wins major award")
|
||||
Mild positive: +1 (e.g., "Steady growth continues")
|
||||
Neutral: 0 (e.g., "Company releases Q3 report")
|
||||
Mild negative: -1 (e.g., "Faces increased competition")
|
||||
Strong negative: -2 (e.g., "Major data breach disclosed")
|
||||
```
|
||||
|
||||
Track rolling average over last 5 collection cycles to detect trends.
|
||||
|
||||
---
|
||||
|
||||
## Report Templates
|
||||
|
||||
### Intelligence Brief (Markdown)
|
||||
```markdown
|
||||
# Intelligence Report: [Target]
|
||||
**Date**: YYYY-MM-DD HH:MM UTC
|
||||
**Collection Cycle**: #N
|
||||
**Sources Processed**: X
|
||||
**New Data Points**: Y
|
||||
|
||||
## Priority Changes
|
||||
1. [CRITICAL] [Description + source]
|
||||
2. [IMPORTANT] [Description + source]
|
||||
|
||||
## Executive Summary
|
||||
[2-3 paragraph synthesis of new intelligence]
|
||||
|
||||
## Detailed Findings
|
||||
|
||||
### [Category 1]
|
||||
- Finding with [source](url)
|
||||
- Data point with confidence: high/medium/low
|
||||
|
||||
### [Category 2]
|
||||
- ...
|
||||
|
||||
## Entity Updates
|
||||
| Entity | Change | Previous | Current | Source |
|
||||
|--------|--------|----------|---------|--------|
|
||||
|
||||
## Sentiment Trend
|
||||
| Period | Score | Direction | Notable |
|
||||
|--------|-------|-----------|---------|
|
||||
|
||||
## Collection Metadata
|
||||
- Queries executed: N
|
||||
- Sources fetched: N
|
||||
- New entities: N
|
||||
- Updated entities: N
|
||||
- Next scheduled collection: [datetime]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Source Evaluation Checklist
|
||||
|
||||
Before including data in the knowledge graph, evaluate:
|
||||
|
||||
1. **Recency**: Published within relevant timeframe? Stale data can mislead.
|
||||
2. **Primary vs Secondary**: Is this the original source, or citing someone else?
|
||||
3. **Corroboration**: Do other independent sources confirm this?
|
||||
4. **Bias check**: Does the source have a financial or political interest in this claim?
|
||||
5. **Specificity**: Does it provide concrete data, or vague assertions?
|
||||
6. **Track record**: Has this source been reliable in the past?
|
||||
|
||||
If a claim fails 3+ checks, downgrade its confidence to "low".
|
||||
335
crates/openfang-hands/bundled/lead/HAND.toml
Normal file
335
crates/openfang-hands/bundled/lead/HAND.toml
Normal file
@@ -0,0 +1,335 @@
|
||||
id = "lead"
|
||||
name = "Lead Hand"
|
||||
description = "Autonomous lead generation — discovers, enriches, and delivers qualified leads on a schedule"
|
||||
category = "data"
|
||||
icon = "\U0001F4CA"
|
||||
tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query"]
|
||||
|
||||
# ─── Configurable settings ───────────────────────────────────────────────────
|
||||
|
||||
[[settings]]
|
||||
key = "target_industry"
|
||||
label = "Target Industry"
|
||||
description = "Industry vertical to focus on (e.g. SaaS, fintech, healthcare, e-commerce)"
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "target_role"
|
||||
label = "Target Role"
|
||||
description = "Decision-maker titles to target (e.g. CTO, VP Engineering, Head of Product)"
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "company_size"
|
||||
label = "Company Size"
|
||||
description = "Filter leads by company size"
|
||||
setting_type = "select"
|
||||
default = "any"
|
||||
|
||||
[[settings.options]]
|
||||
value = "any"
|
||||
label = "Any size"
|
||||
|
||||
[[settings.options]]
|
||||
value = "startup"
|
||||
label = "Startup (1-50)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "smb"
|
||||
label = "SMB (50-500)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "enterprise"
|
||||
label = "Enterprise (500+)"
|
||||
|
||||
[[settings]]
|
||||
key = "lead_source"
|
||||
label = "Lead Source"
|
||||
description = "Primary method for discovering leads"
|
||||
setting_type = "select"
|
||||
default = "web_search"
|
||||
|
||||
[[settings.options]]
|
||||
value = "web_search"
|
||||
label = "Web Search"
|
||||
|
||||
[[settings.options]]
|
||||
value = "linkedin_public"
|
||||
label = "LinkedIn (public profiles)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "crunchbase"
|
||||
label = "Crunchbase"
|
||||
|
||||
[[settings.options]]
|
||||
value = "custom"
|
||||
label = "Custom (specify in prompt)"
|
||||
|
||||
[[settings]]
|
||||
key = "output_format"
|
||||
label = "Output Format"
|
||||
description = "Report delivery format"
|
||||
setting_type = "select"
|
||||
default = "csv"
|
||||
|
||||
[[settings.options]]
|
||||
value = "csv"
|
||||
label = "CSV"
|
||||
|
||||
[[settings.options]]
|
||||
value = "json"
|
||||
label = "JSON"
|
||||
|
||||
[[settings.options]]
|
||||
value = "markdown_table"
|
||||
label = "Markdown Table"
|
||||
|
||||
[[settings]]
|
||||
key = "leads_per_report"
|
||||
label = "Leads Per Report"
|
||||
description = "Number of leads to include in each report"
|
||||
setting_type = "select"
|
||||
default = "25"
|
||||
|
||||
[[settings.options]]
|
||||
value = "10"
|
||||
label = "10 leads"
|
||||
|
||||
[[settings.options]]
|
||||
value = "25"
|
||||
label = "25 leads"
|
||||
|
||||
[[settings.options]]
|
||||
value = "50"
|
||||
label = "50 leads"
|
||||
|
||||
[[settings.options]]
|
||||
value = "100"
|
||||
label = "100 leads"
|
||||
|
||||
[[settings]]
|
||||
key = "delivery_schedule"
|
||||
label = "Delivery Schedule"
|
||||
description = "When to generate and deliver lead reports"
|
||||
setting_type = "select"
|
||||
default = "daily_9am"
|
||||
|
||||
[[settings.options]]
|
||||
value = "daily_7am"
|
||||
label = "Daily at 7 AM"
|
||||
|
||||
[[settings.options]]
|
||||
value = "daily_9am"
|
||||
label = "Daily at 9 AM"
|
||||
|
||||
[[settings.options]]
|
||||
value = "weekdays_8am"
|
||||
label = "Weekdays at 8 AM"
|
||||
|
||||
[[settings.options]]
|
||||
value = "weekly_monday"
|
||||
label = "Weekly on Monday"
|
||||
|
||||
[[settings]]
|
||||
key = "geo_focus"
|
||||
label = "Geographic Focus"
|
||||
description = "Geographic region to prioritize (e.g. US, Europe, APAC, global)"
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "enrichment_depth"
|
||||
label = "Enrichment Depth"
|
||||
description = "How much context to gather per lead"
|
||||
setting_type = "select"
|
||||
default = "standard"
|
||||
|
||||
[[settings.options]]
|
||||
value = "basic"
|
||||
label = "Basic (name, title, company)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "standard"
|
||||
label = "Standard (+ company size, industry, tech stack)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "deep"
|
||||
label = "Deep (+ funding, recent news, social profiles)"
|
||||
|
||||
# ─── Agent configuration ─────────────────────────────────────────────────────
|
||||
|
||||
[agent]
|
||||
name = "lead-hand"
|
||||
description = "AI lead generation engine — discovers, enriches, deduplicates, and delivers qualified leads on your schedule"
|
||||
module = "builtin:chat"
|
||||
provider = "default"
|
||||
model = "default"
|
||||
max_tokens = 16384
|
||||
temperature = 0.3
|
||||
max_iterations = 50
|
||||
system_prompt = """You are Lead Hand — an autonomous lead generation engine that discovers, enriches, and delivers qualified leads 24/7.
|
||||
|
||||
## Phase 0 — Platform Detection (ALWAYS DO THIS FIRST)
|
||||
|
||||
Before running any command, detect the operating system:
|
||||
```
|
||||
python -c "import platform; print(platform.system())"
|
||||
```
|
||||
Then set your approach:
|
||||
- **Windows**: paths use forward slashes in Python, `del` for cleanup
|
||||
- **macOS / Linux**: standard Unix paths, `rm` for cleanup
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — State Recovery & Schedule Setup
|
||||
|
||||
On first run:
|
||||
1. Check memory_recall for `lead_hand_state` — if it exists, you're resuming
|
||||
2. Read the **User Configuration** section for target_industry, target_role, company_size, geo_focus, etc.
|
||||
3. Create your delivery schedule using schedule_create based on `delivery_schedule` setting
|
||||
4. Load any existing lead database from `leads_database.json` via file_read (if it exists)
|
||||
|
||||
On subsequent runs:
|
||||
1. Recall `lead_hand_state` from memory — load your cumulative lead database
|
||||
2. Check if this is a scheduled run or a user-triggered run
|
||||
3. Load the existing leads database to avoid duplicates
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Target Profile Construction
|
||||
|
||||
Build an Ideal Customer Profile (ICP) from user settings:
|
||||
- Industry: from `target_industry` setting
|
||||
- Decision-maker roles: from `target_role` setting
|
||||
- Company size filter: from `company_size` setting
|
||||
- Geography: from `geo_focus` setting
|
||||
|
||||
Store the ICP in the knowledge graph:
|
||||
- knowledge_add_entity: ICP profile node
|
||||
- knowledge_add_relation: link ICP to target attributes
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Lead Discovery
|
||||
|
||||
Execute a multi-query web research loop:
|
||||
1. Construct 5-10 search queries combining industry + role + signals:
|
||||
- "[industry] [role] hiring" (growth signal)
|
||||
- "[industry] companies series [A/B/C] funding" (funded companies)
|
||||
- "[industry] companies [geo] list" (geographic targeting)
|
||||
- "top [industry] startups 2024 2025" (emerging companies)
|
||||
- "[company_size] [industry] companies [geo]" (size-filtered)
|
||||
2. For each query, use web_search to find results
|
||||
3. For promising results, use web_fetch to extract company/person details
|
||||
4. Extract structured lead data: name, title, company, company_url, linkedin_url (if public), email pattern
|
||||
|
||||
Target: discover 2-3x the `leads_per_report` setting to allow for filtering.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Lead Enrichment
|
||||
|
||||
For each discovered lead, based on `enrichment_depth`:
|
||||
|
||||
**Basic**: name, title, company — already have this from discovery
|
||||
**Standard**: additionally fetch:
|
||||
- Company website (web_fetch company_url) — extract: employee count, industry, tech stack, product description
|
||||
- Look for company on job boards — hiring signals indicate growth
|
||||
**Deep**: additionally fetch:
|
||||
- Recent funding news (web_search "[company] funding round")
|
||||
- Recent company news (web_search "[company] news 2025")
|
||||
- Social profiles (web_search "[person name] [company] linkedin twitter")
|
||||
|
||||
Store enriched entities in knowledge graph:
|
||||
- knowledge_add_entity for each lead and company
|
||||
- knowledge_add_relation for lead→company, company→industry relationships
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Deduplication & Scoring
|
||||
|
||||
1. Compare new leads against existing `leads_database.json`:
|
||||
- Match on: normalized company name + person name
|
||||
- Skip exact duplicates
|
||||
- Update existing leads with new enrichment data
|
||||
2. Score each lead (0-100):
|
||||
- ICP match: +30 (industry, role, size, geo all match)
|
||||
- Growth signals: +20 (hiring, funding, news)
|
||||
- Enrichment completeness: +20 (all fields populated)
|
||||
- Recency: +15 (company active recently)
|
||||
- Accessibility: +15 (public contact info available)
|
||||
3. Sort by score descending
|
||||
4. Take top N leads per `leads_per_report` setting
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Report Generation
|
||||
|
||||
Generate the report in the configured `output_format`:
|
||||
|
||||
**CSV format**:
|
||||
```csv
|
||||
Name,Title,Company,Company URL,Industry,Company Size,Score,Discovery Date,Notes
|
||||
```
|
||||
|
||||
**JSON format**:
|
||||
```json
|
||||
[{"name": "...", "title": "...", "company": "...", "company_url": "...", "industry": "...", "size": "...", "score": 85, "discovered": "2025-01-15", "enrichment": {...}}]
|
||||
```
|
||||
|
||||
**Markdown Table format**:
|
||||
```markdown
|
||||
| # | Name | Title | Company | Score | Signal |
|
||||
|---|------|-------|---------|-------|--------|
|
||||
```
|
||||
|
||||
Save report to: `lead_report_YYYY-MM-DD.{csv,json,md}`
|
||||
|
||||
---
|
||||
|
||||
## Phase 7 — State Persistence
|
||||
|
||||
After each run:
|
||||
1. Update `leads_database.json` with all known leads (new + existing)
|
||||
2. memory_store `lead_hand_state` with: last_run, total_leads, report_count
|
||||
3. Update dashboard stats:
|
||||
- memory_store `lead_hand_leads_found` — total unique leads discovered
|
||||
- memory_store `lead_hand_reports_generated` — increment report count
|
||||
- memory_store `lead_hand_last_report_date` — today's date
|
||||
- memory_store `lead_hand_unique_companies` — count of unique companies
|
||||
|
||||
---
|
||||
|
||||
## Guidelines
|
||||
|
||||
- NEVER fabricate lead data — every field must come from actual web research
|
||||
- Respect robots.txt and rate limits — add delays between fetches if needed
|
||||
- Do NOT scrape behind login walls — only use publicly available information
|
||||
- If a search yields no results, try alternative queries before giving up
|
||||
- Always deduplicate before reporting — users hate seeing the same lead twice
|
||||
- Include your confidence level for enriched data (e.g. "email pattern: likely" vs "email: verified")
|
||||
- If the user messages you directly, pause the pipeline and respond to their question
|
||||
"""
|
||||
|
||||
[dashboard]
|
||||
[[dashboard.metrics]]
|
||||
label = "Leads Found"
|
||||
memory_key = "lead_hand_leads_found"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Reports Generated"
|
||||
memory_key = "lead_hand_reports_generated"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Last Report"
|
||||
memory_key = "lead_hand_last_report_date"
|
||||
format = "text"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Unique Companies"
|
||||
memory_key = "lead_hand_unique_companies"
|
||||
format = "number"
|
||||
235
crates/openfang-hands/bundled/lead/SKILL.md
Normal file
235
crates/openfang-hands/bundled/lead/SKILL.md
Normal file
@@ -0,0 +1,235 @@
|
||||
---
|
||||
name: lead-hand-skill
|
||||
version: "1.0.0"
|
||||
description: "Expert knowledge for AI lead generation — web research, enrichment, scoring, deduplication, and report generation"
|
||||
runtime: prompt_only
|
||||
---
|
||||
|
||||
# Lead Generation Expert Knowledge
|
||||
|
||||
## Ideal Customer Profile (ICP) Construction
|
||||
|
||||
A good ICP answers these questions:
|
||||
1. **Industry**: What vertical does your ideal customer operate in?
|
||||
2. **Company size**: How many employees? What revenue range?
|
||||
3. **Geography**: Where are they located?
|
||||
4. **Technology**: What tech stack do they use?
|
||||
5. **Budget signals**: Are they funded? Growing? Hiring?
|
||||
6. **Decision-maker**: Who has buying authority? (title, seniority)
|
||||
7. **Pain points**: What problems does your product solve for them?
|
||||
|
||||
### Company Size Categories
|
||||
| Category | Employees | Typical Budget | Sales Cycle |
|
||||
|----------|-----------|---------------|-------------|
|
||||
| Startup | 1-50 | $1K-$25K/yr | 1-4 weeks |
|
||||
| SMB | 50-500 | $25K-$250K/yr | 1-3 months |
|
||||
| Enterprise | 500+ | $250K+/yr | 3-12 months |
|
||||
|
||||
---
|
||||
|
||||
## Web Research Techniques for Lead Discovery
|
||||
|
||||
### Search Query Patterns
|
||||
```
|
||||
# Find companies in a vertical
|
||||
"[industry] companies" site:crunchbase.com
|
||||
"top [industry] startups [year]"
|
||||
"[industry] companies [city/region]"
|
||||
|
||||
# Find decision-makers
|
||||
"[title]" "[company]" site:linkedin.com
|
||||
"[company] team" OR "[company] about us" OR "[company] leadership"
|
||||
|
||||
# Growth signals (high-intent leads)
|
||||
"[company] hiring [role]" — indicates budget and growth
|
||||
"[company] series [A/B/C]" — recently funded
|
||||
"[company] expansion" OR "[company] new office"
|
||||
"[company] product launch [year]"
|
||||
|
||||
# Technology signals
|
||||
"[company] uses [technology]" OR "[company] built with [technology]"
|
||||
site:stackshare.io "[company]"
|
||||
site:builtwith.com "[company]"
|
||||
```
|
||||
|
||||
### Source Quality Ranking
|
||||
1. **Company website** (About/Team pages) — most reliable for personnel
|
||||
2. **Crunchbase** — funding, company details, leadership
|
||||
3. **LinkedIn** (public profiles) — titles, tenure, connections
|
||||
4. **Press releases** — announcements, partnerships, funding
|
||||
5. **Job boards** — hiring signals, tech stack requirements
|
||||
6. **Industry directories** — comprehensive company lists
|
||||
7. **News articles** — recent activity, reputation
|
||||
8. **Social media** — engagement, company culture
|
||||
|
||||
---
|
||||
|
||||
## Lead Enrichment Patterns
|
||||
|
||||
### Basic Enrichment (always available)
|
||||
- Full name (first + last)
|
||||
- Job title
|
||||
- Company name
|
||||
- Company website URL
|
||||
|
||||
### Standard Enrichment
|
||||
- Company employee count (from About page, Crunchbase, or LinkedIn)
|
||||
- Company industry classification
|
||||
- Company founding year
|
||||
- Technology stack (from job postings, StackShare, BuiltWith)
|
||||
- Social profiles (LinkedIn URL, Twitter handle)
|
||||
- Company description (from meta tags or About page)
|
||||
|
||||
### Deep Enrichment
|
||||
- Recent funding rounds (amount, investors, date)
|
||||
- Recent news mentions (last 90 days)
|
||||
- Key competitors
|
||||
- Estimated revenue range
|
||||
- Recent job postings (growth signals)
|
||||
- Company blog/content activity (engagement level)
|
||||
- Executive team changes
|
||||
|
||||
### Email Pattern Discovery
|
||||
Common corporate email formats (try in order):
|
||||
1. `firstname@company.com` (most common for small companies)
|
||||
2. `firstname.lastname@company.com` (most common for larger companies)
|
||||
3. `first_initial+lastname@company.com` (e.g., jsmith@)
|
||||
4. `firstname+last_initial@company.com` (e.g., johns@)
|
||||
|
||||
Note: NEVER send unsolicited emails. Email patterns are for reference only.
|
||||
|
||||
---
|
||||
|
||||
## Lead Scoring Framework
|
||||
|
||||
### Scoring Rubric (0-100)
|
||||
```
|
||||
ICP Match (30 points max):
|
||||
Industry match: +10
|
||||
Company size match: +5
|
||||
Geography match: +5
|
||||
Role/title match: +10
|
||||
|
||||
Growth Signals (20 points max):
|
||||
Recent funding: +8
|
||||
Actively hiring: +6
|
||||
Product launch: +3
|
||||
Press coverage: +3
|
||||
|
||||
Enrichment Quality (20 points max):
|
||||
Email found: +5
|
||||
LinkedIn found: +5
|
||||
Full company data: +5
|
||||
Tech stack known: +5
|
||||
|
||||
Recency (15 points max):
|
||||
Active this month: +15
|
||||
Active this quarter:+10
|
||||
Active this year: +5
|
||||
No recent activity: +0
|
||||
|
||||
Accessibility (15 points max):
|
||||
Direct contact: +15
|
||||
Company contact: +10
|
||||
Social only: +5
|
||||
No contact info: +0
|
||||
```
|
||||
|
||||
### Score Interpretation
|
||||
| Score | Grade | Action |
|
||||
|-------|-------|--------|
|
||||
| 80-100 | A | Hot lead — prioritize outreach |
|
||||
| 60-79 | B | Warm lead — nurture |
|
||||
| 40-59 | C | Cool lead — enrich further |
|
||||
| 0-39 | D | Cold lead — deprioritize |
|
||||
|
||||
---
|
||||
|
||||
## Deduplication Strategies
|
||||
|
||||
### Matching Algorithm
|
||||
1. **Exact match**: Normalize company name (lowercase, strip Inc/LLC/Ltd) + person name
|
||||
2. **Fuzzy match**: Levenshtein distance < 2 on company name + same person
|
||||
3. **Domain match**: Same company website domain = same company
|
||||
4. **Cross-source merge**: Same person at same company from different sources → merge enrichment data
|
||||
|
||||
### Normalization Rules
|
||||
```
|
||||
Company name:
|
||||
- Strip legal suffixes: Inc, LLC, Ltd, Corp, Co, GmbH, AG, SA
|
||||
- Lowercase
|
||||
- Remove "The" prefix
|
||||
- Collapse whitespace
|
||||
|
||||
Person name:
|
||||
- Lowercase
|
||||
- Remove middle names/initials
|
||||
- Handle "Bob" = "Robert", "Mike" = "Michael" (common nicknames)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format Templates
|
||||
|
||||
### CSV Format
|
||||
```csv
|
||||
Name,Title,Company,Company URL,LinkedIn,Industry,Size,Score,Discovered,Notes
|
||||
"Jane Smith","VP Engineering","Acme Corp","https://acme.com","https://linkedin.com/in/janesmith","SaaS","SMB (120 employees)",85,"2025-01-15","Series B funded, hiring 5 engineers"
|
||||
```
|
||||
|
||||
### JSON Format
|
||||
```json
|
||||
[
|
||||
{
|
||||
"name": "Jane Smith",
|
||||
"title": "VP Engineering",
|
||||
"company": "Acme Corp",
|
||||
"company_url": "https://acme.com",
|
||||
"linkedin": "https://linkedin.com/in/janesmith",
|
||||
"industry": "SaaS",
|
||||
"company_size": "SMB",
|
||||
"employee_count": 120,
|
||||
"score": 85,
|
||||
"discovered": "2025-01-15",
|
||||
"enrichment": {
|
||||
"funding": "Series B, $15M",
|
||||
"hiring": true,
|
||||
"tech_stack": ["React", "Python", "AWS"],
|
||||
"recent_news": "Launched enterprise plan Q4 2024"
|
||||
},
|
||||
"notes": "Strong ICP match, actively growing"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### Markdown Table Format
|
||||
```markdown
|
||||
| # | Name | Title | Company | Score | Key Signal |
|
||||
|---|------|-------|---------|-------|------------|
|
||||
| 1 | Jane Smith | VP Engineering | Acme Corp | 85 | Series B funded, hiring |
|
||||
| 2 | John Doe | CTO | Beta Inc | 72 | Product launch Q1 2025 |
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Compliance & Ethics
|
||||
|
||||
### DO
|
||||
- Use only publicly available information
|
||||
- Respect robots.txt and rate limits
|
||||
- Include data provenance (where each piece of info came from)
|
||||
- Allow users to export and delete their lead data
|
||||
- Clearly mark confidence levels on enriched data
|
||||
|
||||
### DO NOT
|
||||
- Scrape behind login walls or paywalls
|
||||
- Fabricate any lead data (even "likely" email addresses without evidence)
|
||||
- Store sensitive personal data (SSN, financial info, health data)
|
||||
- Send unsolicited communications on behalf of the user
|
||||
- Bypass anti-scraping measures (CAPTCHAs, rate limits)
|
||||
- Collect data on individuals who have opted out of data collection
|
||||
|
||||
### Data Retention
|
||||
- Keep lead data in local files only — never exfiltrate
|
||||
- Mark stale leads (>90 days without activity) for review
|
||||
- Provide clear data export in all supported formats
|
||||
381
crates/openfang-hands/bundled/predictor/HAND.toml
Normal file
381
crates/openfang-hands/bundled/predictor/HAND.toml
Normal file
@@ -0,0 +1,381 @@
|
||||
id = "predictor"
|
||||
name = "Predictor Hand"
|
||||
description = "Autonomous future predictor — collects signals, builds reasoning chains, makes calibrated predictions, and tracks accuracy"
|
||||
category = "data"
|
||||
icon = "\U0001F52E"
|
||||
tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query"]
|
||||
|
||||
# ─── Configurable settings ───────────────────────────────────────────────────
|
||||
|
||||
[[settings]]
|
||||
key = "prediction_domain"
|
||||
label = "Prediction Domain"
|
||||
description = "Primary domain for predictions"
|
||||
setting_type = "select"
|
||||
default = "tech"
|
||||
|
||||
[[settings.options]]
|
||||
value = "tech"
|
||||
label = "Technology"
|
||||
|
||||
[[settings.options]]
|
||||
value = "finance"
|
||||
label = "Finance & Markets"
|
||||
|
||||
[[settings.options]]
|
||||
value = "geopolitics"
|
||||
label = "Geopolitics"
|
||||
|
||||
[[settings.options]]
|
||||
value = "climate"
|
||||
label = "Climate & Energy"
|
||||
|
||||
[[settings.options]]
|
||||
value = "general"
|
||||
label = "General (cross-domain)"
|
||||
|
||||
[[settings]]
|
||||
key = "time_horizon"
|
||||
label = "Time Horizon"
|
||||
description = "How far ahead to predict"
|
||||
setting_type = "select"
|
||||
default = "3_months"
|
||||
|
||||
[[settings.options]]
|
||||
value = "1_week"
|
||||
label = "1 week"
|
||||
|
||||
[[settings.options]]
|
||||
value = "1_month"
|
||||
label = "1 month"
|
||||
|
||||
[[settings.options]]
|
||||
value = "3_months"
|
||||
label = "3 months"
|
||||
|
||||
[[settings.options]]
|
||||
value = "1_year"
|
||||
label = "1 year"
|
||||
|
||||
[[settings]]
|
||||
key = "data_sources"
|
||||
label = "Data Sources"
|
||||
description = "What types of sources to monitor for signals"
|
||||
setting_type = "select"
|
||||
default = "all"
|
||||
|
||||
[[settings.options]]
|
||||
value = "news"
|
||||
label = "News only"
|
||||
|
||||
[[settings.options]]
|
||||
value = "social"
|
||||
label = "Social media"
|
||||
|
||||
[[settings.options]]
|
||||
value = "financial"
|
||||
label = "Financial data"
|
||||
|
||||
[[settings.options]]
|
||||
value = "academic"
|
||||
label = "Academic papers"
|
||||
|
||||
[[settings.options]]
|
||||
value = "all"
|
||||
label = "All sources"
|
||||
|
||||
[[settings]]
|
||||
key = "report_frequency"
|
||||
label = "Report Frequency"
|
||||
description = "How often to generate prediction reports"
|
||||
setting_type = "select"
|
||||
default = "weekly"
|
||||
|
||||
[[settings.options]]
|
||||
value = "daily"
|
||||
label = "Daily"
|
||||
|
||||
[[settings.options]]
|
||||
value = "weekly"
|
||||
label = "Weekly"
|
||||
|
||||
[[settings.options]]
|
||||
value = "biweekly"
|
||||
label = "Biweekly"
|
||||
|
||||
[[settings.options]]
|
||||
value = "monthly"
|
||||
label = "Monthly"
|
||||
|
||||
[[settings]]
|
||||
key = "predictions_per_report"
|
||||
label = "Predictions Per Report"
|
||||
description = "Number of predictions to include per report"
|
||||
setting_type = "select"
|
||||
default = "5"
|
||||
|
||||
[[settings.options]]
|
||||
value = "3"
|
||||
label = "3 predictions"
|
||||
|
||||
[[settings.options]]
|
||||
value = "5"
|
||||
label = "5 predictions"
|
||||
|
||||
[[settings.options]]
|
||||
value = "10"
|
||||
label = "10 predictions"
|
||||
|
||||
[[settings.options]]
|
||||
value = "20"
|
||||
label = "20 predictions"
|
||||
|
||||
[[settings]]
|
||||
key = "track_accuracy"
|
||||
label = "Track Accuracy"
|
||||
description = "Score past predictions when their time horizon expires"
|
||||
setting_type = "toggle"
|
||||
default = "true"
|
||||
|
||||
[[settings]]
|
||||
key = "confidence_threshold"
|
||||
label = "Confidence Threshold"
|
||||
description = "Minimum confidence to include a prediction"
|
||||
setting_type = "select"
|
||||
default = "medium"
|
||||
|
||||
[[settings.options]]
|
||||
value = "low"
|
||||
label = "Low (20%+ confidence)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "medium"
|
||||
label = "Medium (40%+ confidence)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "high"
|
||||
label = "High (70%+ confidence)"
|
||||
|
||||
[[settings]]
|
||||
key = "contrarian_mode"
|
||||
label = "Contrarian Mode"
|
||||
description = "Actively seek and present counter-consensus predictions"
|
||||
setting_type = "toggle"
|
||||
default = "false"
|
||||
|
||||
# ─── Agent configuration ─────────────────────────────────────────────────────
|
||||
|
||||
[agent]
|
||||
name = "predictor-hand"
|
||||
description = "AI forecasting engine — collects signals, builds reasoning chains, makes calibrated predictions, and tracks accuracy over time"
|
||||
module = "builtin:chat"
|
||||
provider = "default"
|
||||
model = "default"
|
||||
max_tokens = 16384
|
||||
temperature = 0.5
|
||||
max_iterations = 60
|
||||
system_prompt = """You are Predictor Hand — an autonomous forecasting engine inspired by superforecasting principles. You collect signals, build reasoning chains, make calibrated predictions, and rigorously track your accuracy.
|
||||
|
||||
## Phase 0 — Platform Detection & State Recovery (ALWAYS DO THIS FIRST)
|
||||
|
||||
Detect the operating system:
|
||||
```
|
||||
python -c "import platform; print(platform.system())"
|
||||
```
|
||||
|
||||
Then recover state:
|
||||
1. memory_recall `predictor_hand_state` — load previous predictions and accuracy data
|
||||
2. Read **User Configuration** for prediction_domain, time_horizon, data_sources, etc.
|
||||
3. file_read `predictions_database.json` if it exists — your prediction ledger
|
||||
4. knowledge_query for existing signal entities
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Schedule & Domain Setup
|
||||
|
||||
On first run:
|
||||
1. Create report schedule using schedule_create based on `report_frequency`
|
||||
2. Build domain-specific query templates based on `prediction_domain`:
|
||||
- **Tech**: product launches, funding, adoption metrics, regulatory, open source
|
||||
- **Finance**: earnings, macro indicators, commodity prices, central bank, M&A
|
||||
- **Geopolitics**: elections, treaties, conflicts, sanctions, trade policy
|
||||
- **Climate**: emissions data, renewable adoption, policy changes, extreme events
|
||||
- **General**: cross-domain trend intersections
|
||||
3. Initialize prediction ledger structure
|
||||
|
||||
On subsequent runs:
|
||||
1. Load prediction ledger from `predictions_database.json`
|
||||
2. Check for expired predictions that need accuracy scoring
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Signal Collection
|
||||
|
||||
Execute 20-40 targeted search queries based on domain and data_sources:
|
||||
|
||||
For each source type:
|
||||
**News**: "[domain] breaking", "[domain] analysis", "[domain] trend [year]"
|
||||
**Social**: "[domain] discussion", "[domain] sentiment", "[topic] viral"
|
||||
**Financial**: "[domain] earnings report", "[domain] market data", "[domain] analyst forecast"
|
||||
**Academic**: "[domain] research paper [year]", "[domain] study findings", "[domain] preprint"
|
||||
|
||||
For each result:
|
||||
1. web_search → get top results
|
||||
2. web_fetch promising links → extract key claims, data points, expert opinions
|
||||
3. Tag each signal:
|
||||
- Type: leading_indicator / lagging_indicator / base_rate / expert_opinion / data_point / anomaly
|
||||
- Strength: strong / moderate / weak
|
||||
- Direction: bullish / bearish / neutral
|
||||
- Source credibility: institutional / media / individual / anonymous
|
||||
|
||||
Store signals in knowledge graph as entities with relations to the domain.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Accuracy Review (if track_accuracy is enabled)
|
||||
|
||||
For each prediction in the ledger where `resolution_date <= today`:
|
||||
1. web_search for evidence of the predicted outcome
|
||||
2. Score the prediction:
|
||||
- **Correct**: outcome matches prediction within stated margin
|
||||
- **Partially correct**: direction right but magnitude off
|
||||
- **Incorrect**: outcome contradicts prediction
|
||||
- **Unresolvable**: insufficient evidence to determine outcome
|
||||
3. Calculate Brier score: (predicted_probability - actual_outcome)^2
|
||||
4. Update cumulative accuracy metrics
|
||||
5. Analyze calibration: are your 70% predictions right ~70% of the time?
|
||||
|
||||
Feed accuracy insights back into your calibration for new predictions.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Pattern Analysis & Reasoning Chains
|
||||
|
||||
For each potential prediction:
|
||||
1. Gather ALL relevant signals from the knowledge graph
|
||||
2. Build a reasoning chain:
|
||||
- **Base rate**: What's the historical frequency of this type of event?
|
||||
- **Evidence for**: Signals supporting the prediction
|
||||
- **Evidence against**: Signals contradicting the prediction
|
||||
- **Key uncertainties**: What could change the outcome?
|
||||
- **Reference class**: What similar situations have occurred before?
|
||||
3. Apply cognitive bias checks:
|
||||
- Am I anchoring on a salient number?
|
||||
- Am I falling for narrative bias (good story ≠ likely outcome)?
|
||||
- Am I displaying overconfidence?
|
||||
- Am I neglecting base rates?
|
||||
4. If `contrarian_mode` is enabled:
|
||||
- Identify the consensus view
|
||||
- Actively search for evidence that the consensus is wrong
|
||||
- Include at least one counter-consensus prediction per report
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Prediction Formulation
|
||||
|
||||
For each prediction (up to `predictions_per_report`):
|
||||
|
||||
Structure:
|
||||
```
|
||||
PREDICTION: [Clear, specific, falsifiable claim]
|
||||
CONFIDENCE: [X%] — calibrated probability
|
||||
TIME HORIZON: [specific date or range]
|
||||
DOMAIN: [domain tag]
|
||||
|
||||
REASONING CHAIN:
|
||||
1. Base rate: [historical frequency]
|
||||
2. Key signals FOR (+X%): [signal list with weights]
|
||||
3. Key signals AGAINST (-X%): [signal list with weights]
|
||||
4. Net adjustment from base: [explanation]
|
||||
|
||||
KEY ASSUMPTIONS:
|
||||
- [What must be true for this prediction to hold]
|
||||
|
||||
RESOLUTION CRITERIA:
|
||||
- [Exactly how to determine if this prediction was correct]
|
||||
```
|
||||
|
||||
Filter by `confidence_threshold` setting — only include predictions above the threshold.
|
||||
|
||||
Assign a unique ID to each prediction for tracking.
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Report Generation
|
||||
|
||||
Generate the prediction report:
|
||||
|
||||
```markdown
|
||||
# Prediction Report: [domain]
|
||||
**Date**: YYYY-MM-DD | **Report #**: N | **Signals Analyzed**: X
|
||||
|
||||
## Accuracy Dashboard (if tracking)
|
||||
- Overall accuracy: X% (N predictions resolved)
|
||||
- Brier score: 0.XX (lower is better, 0 = perfect)
|
||||
- Calibration: [well-calibrated / overconfident / underconfident]
|
||||
|
||||
## Active Predictions
|
||||
| # | Prediction | Confidence | Horizon | Status |
|
||||
|---|-----------|------------|---------|--------|
|
||||
|
||||
## New Predictions This Report
|
||||
[Detailed prediction entries with reasoning chains]
|
||||
|
||||
## Expired Predictions (Resolved This Cycle)
|
||||
[Results with accuracy analysis]
|
||||
|
||||
## Signal Landscape
|
||||
[Summary of key signals collected this cycle]
|
||||
|
||||
## Meta-Analysis
|
||||
[What your accuracy data tells you about your forecasting strengths and weaknesses]
|
||||
```
|
||||
|
||||
Save to: `prediction_report_YYYY-MM-DD.md`
|
||||
|
||||
---
|
||||
|
||||
## Phase 7 — State Persistence
|
||||
|
||||
1. Save updated predictions to `predictions_database.json`
|
||||
2. memory_store `predictor_hand_state`: last_run, total_predictions, accuracy_data
|
||||
3. Update dashboard stats:
|
||||
- memory_store `predictor_hand_predictions_made` — total predictions ever made
|
||||
- memory_store `predictor_hand_accuracy_pct` — overall accuracy percentage
|
||||
- memory_store `predictor_hand_reports_generated` — report count
|
||||
- memory_store `predictor_hand_active_predictions` — currently unresolved predictions
|
||||
|
||||
---
|
||||
|
||||
## Guidelines
|
||||
|
||||
- ALWAYS make predictions specific and falsifiable — "Company X will..." not "things might change"
|
||||
- NEVER express confidence as 0% or 100% — nothing is certain
|
||||
- Calibrate honestly — if you're unsure, say 30-50%, don't default to 80%
|
||||
- Show your reasoning — the chain of logic is more valuable than the prediction itself
|
||||
- Track ALL predictions — don't selectively forget bad ones
|
||||
- Update predictions when significant new evidence arrives (note the update in the ledger)
|
||||
- If the user messages you directly, pause and respond to their question
|
||||
- Distinguish between predictions (testable forecasts) and opinions (untestable views)
|
||||
"""
|
||||
|
||||
[dashboard]
|
||||
[[dashboard.metrics]]
|
||||
label = "Predictions Made"
|
||||
memory_key = "predictor_hand_predictions_made"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Accuracy"
|
||||
memory_key = "predictor_hand_accuracy_pct"
|
||||
format = "percentage"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Reports Generated"
|
||||
memory_key = "predictor_hand_reports_generated"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Active Predictions"
|
||||
memory_key = "predictor_hand_active_predictions"
|
||||
format = "number"
|
||||
272
crates/openfang-hands/bundled/predictor/SKILL.md
Normal file
272
crates/openfang-hands/bundled/predictor/SKILL.md
Normal file
@@ -0,0 +1,272 @@
|
||||
---
|
||||
name: predictor-hand-skill
|
||||
version: "1.0.0"
|
||||
description: "Expert knowledge for AI forecasting — superforecasting principles, signal taxonomy, confidence calibration, reasoning chains, and accuracy tracking"
|
||||
runtime: prompt_only
|
||||
---
|
||||
|
||||
# Forecasting Expert Knowledge
|
||||
|
||||
## Superforecasting Principles
|
||||
|
||||
Based on research by Philip Tetlock and the Good Judgment Project:
|
||||
|
||||
1. **Triage**: Focus on questions that are hard enough to be interesting but not so hard they're unknowable
|
||||
2. **Break problems apart**: Decompose big questions into smaller, researchable sub-questions (Fermi estimation)
|
||||
3. **Balance inside and outside views**: Use both specific evidence AND base rates from reference classes
|
||||
4. **Update incrementally**: Adjust predictions in small steps as new evidence arrives (Bayesian updating)
|
||||
5. **Look for clashing forces**: Identify factors pulling in opposite directions
|
||||
6. **Distinguish signal from noise**: Weight signals by their reliability and relevance
|
||||
7. **Calibrate**: Your 70% predictions should come true ~70% of the time
|
||||
8. **Post-mortem**: Analyze why predictions went wrong, not just celebrate the right ones
|
||||
9. **Avoid the narrative trap**: A compelling story is not the same as a likely outcome
|
||||
10. **Collaborate**: Aggregate views from diverse perspectives
|
||||
|
||||
---
|
||||
|
||||
## Signal Taxonomy
|
||||
|
||||
### Signal Types
|
||||
| Type | Description | Weight | Example |
|
||||
|------|-----------|--------|---------|
|
||||
| Leading indicator | Predicts future movement | High | Job postings surge → company expanding |
|
||||
| Lagging indicator | Confirms past movement | Medium | Quarterly earnings → business health |
|
||||
| Base rate | Historical frequency | High | "80% of startups fail within 5 years" |
|
||||
| Expert opinion | Informed prediction | Medium | Analyst forecast, CEO statement |
|
||||
| Data point | Factual measurement | High | Revenue figure, user count, benchmark |
|
||||
| Anomaly | Deviation from pattern | High | Unusual trading volume, sudden hiring freeze |
|
||||
| Structural change | Systemic shift | Very High | New regulation, technology breakthrough |
|
||||
| Sentiment shift | Collective mood change | Medium | Media tone change, social media trend |
|
||||
|
||||
### Signal Strength Assessment
|
||||
```
|
||||
STRONG signal (high predictive value):
|
||||
- Multiple independent sources confirm
|
||||
- Quantitative data (not just opinions)
|
||||
- Leading indicator with historical track record
|
||||
- Structural change with clear causal mechanism
|
||||
|
||||
MODERATE signal (some predictive value):
|
||||
- Single authoritative source
|
||||
- Expert opinion from domain specialist
|
||||
- Historical pattern that may or may not repeat
|
||||
- Lagging indicator (confirms direction)
|
||||
|
||||
WEAK signal (limited predictive value):
|
||||
- Social media buzz without substance
|
||||
- Single anecdote or case study
|
||||
- Rumor or unconfirmed report
|
||||
- Opinion from non-specialist
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Confidence Calibration
|
||||
|
||||
### Probability Scale
|
||||
```
|
||||
95% — Almost certain (would bet 19:1)
|
||||
90% — Very likely (would bet 9:1)
|
||||
80% — Likely (would bet 4:1)
|
||||
70% — Probable (would bet 7:3)
|
||||
60% — Slightly more likely than not
|
||||
50% — Toss-up (genuine uncertainty)
|
||||
40% — Slightly less likely than not
|
||||
30% — Unlikely (but plausible)
|
||||
20% — Very unlikely (but possible)
|
||||
10% — Extremely unlikely
|
||||
5% — Almost impossible (but not zero)
|
||||
```
|
||||
|
||||
### Calibration Rules
|
||||
1. NEVER use 0% or 100% — nothing is absolutely certain
|
||||
2. If you haven't done research, default to the base rate (outside view)
|
||||
3. Your first estimate should be the reference class base rate
|
||||
4. Adjust from the base rate using specific evidence (inside view)
|
||||
5. Typical adjustment: ±5-15% per strong signal, ±2-5% per moderate signal
|
||||
6. If your gut says 80% but your analysis says 55%, trust the analysis
|
||||
|
||||
### Brier Score
|
||||
The gold standard for measuring prediction accuracy:
|
||||
```
|
||||
Brier Score = (predicted_probability - actual_outcome)^2
|
||||
|
||||
actual_outcome = 1 if prediction came true, 0 if not
|
||||
|
||||
Perfect score: 0.0 (you're always right with perfect confidence)
|
||||
Coin flip: 0.25 (saying 50% on everything)
|
||||
Terrible: 1.0 (100% confident, always wrong)
|
||||
|
||||
Good forecaster: < 0.15
|
||||
Average forecaster: 0.20-0.30
|
||||
Bad forecaster: > 0.35
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Domain-Specific Source Guide
|
||||
|
||||
### Technology Predictions
|
||||
| Source Type | Examples | Use For |
|
||||
|-------------|---------|---------|
|
||||
| Product roadmaps | GitHub issues, release notes, blog posts | Feature predictions |
|
||||
| Adoption data | Stack Overflow surveys, NPM downloads, DB-Engines | Technology trends |
|
||||
| Funding data | Crunchbase, PitchBook, TechCrunch | Startup success/failure |
|
||||
| Patent filings | Google Patents, USPTO | Innovation direction |
|
||||
| Job postings | LinkedIn, Indeed, Levels.fyi | Technology demand |
|
||||
| Benchmark data | TechEmpower, MLPerf, Geekbench | Performance trends |
|
||||
|
||||
### Finance Predictions
|
||||
| Source Type | Examples | Use For |
|
||||
|-------------|---------|---------|
|
||||
| Economic data | FRED, BLS, Census | Macro trends |
|
||||
| Earnings | SEC filings, earnings calls | Company performance |
|
||||
| Analyst reports | Bloomberg, Reuters, S&P | Market consensus |
|
||||
| Central bank | Fed minutes, ECB statements | Interest rates, policy |
|
||||
| Commodity data | EIA, OPEC reports | Energy/commodity prices |
|
||||
| Sentiment | VIX, put/call ratio, AAII survey | Market mood |
|
||||
|
||||
### Geopolitics Predictions
|
||||
| Source Type | Examples | Use For |
|
||||
|-------------|---------|---------|
|
||||
| Official sources | Government statements, UN reports | Policy direction |
|
||||
| Think tanks | RAND, Brookings, Chatham House | Analysis |
|
||||
| Election data | Polls, voter registration, 538 | Election outcomes |
|
||||
| Trade data | WTO, customs data, trade balances | Trade policy |
|
||||
| Military data | SIPRI, defense budgets, deployments | Conflict risk |
|
||||
| Diplomatic signals | Ambassador recalls, sanctions, treaties | Relations |
|
||||
|
||||
### Climate Predictions
|
||||
| Source Type | Examples | Use For |
|
||||
|-------------|---------|---------|
|
||||
| Scientific data | IPCC, NASA, NOAA | Climate trends |
|
||||
| Energy data | IEA, EIA, IRENA | Energy transition |
|
||||
| Policy data | COP agreements, national plans | Regulation |
|
||||
| Corporate data | CDP disclosures, sustainability reports | Corporate action |
|
||||
| Technology data | BloombergNEF, patent filings | Clean tech trends |
|
||||
| Investment data | Green bond issuance, ESG flows | Capital allocation |
|
||||
|
||||
---
|
||||
|
||||
## Reasoning Chain Construction
|
||||
|
||||
### Template
|
||||
```
|
||||
PREDICTION: [Specific, falsifiable claim]
|
||||
|
||||
1. REFERENCE CLASS (Outside View)
|
||||
Base rate: [What % of similar events occur?]
|
||||
Reference examples: [3-5 historical analogues]
|
||||
|
||||
2. SPECIFIC EVIDENCE (Inside View)
|
||||
Signals FOR (+):
|
||||
a. [Signal] — strength: [strong/moderate/weak] — adjustment: +X%
|
||||
b. [Signal] — strength: [strong/moderate/weak] — adjustment: +X%
|
||||
|
||||
Signals AGAINST (-):
|
||||
a. [Signal] — strength: [strong/moderate/weak] — adjustment: -X%
|
||||
b. [Signal] — strength: [strong/moderate/weak] — adjustment: -X%
|
||||
|
||||
3. SYNTHESIS
|
||||
Starting probability (base rate): X%
|
||||
Net adjustment: +/-Y%
|
||||
Final probability: Z%
|
||||
|
||||
4. KEY ASSUMPTIONS
|
||||
- [Assumption 1]: If wrong, probability shifts to [W%]
|
||||
- [Assumption 2]: If wrong, probability shifts to [V%]
|
||||
|
||||
5. RESOLUTION
|
||||
Date: [When can this be resolved?]
|
||||
Criteria: [Exactly how to determine if correct]
|
||||
Data source: [Where to check the outcome]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Prediction Tracking & Scoring
|
||||
|
||||
### Prediction Ledger Format
|
||||
```json
|
||||
{
|
||||
"id": "pred_001",
|
||||
"created": "2025-01-15",
|
||||
"prediction": "OpenAI will release GPT-5 before July 2025",
|
||||
"confidence": 0.65,
|
||||
"domain": "tech",
|
||||
"time_horizon": "2025-07-01",
|
||||
"reasoning_chain": "...",
|
||||
"key_signals": ["leaked roadmap", "compute scaling", "hiring patterns"],
|
||||
"status": "active|resolved|expired",
|
||||
"resolution": {
|
||||
"date": "2025-06-30",
|
||||
"outcome": true,
|
||||
"evidence": "Released June 15, 2025",
|
||||
"brier_score": 0.1225
|
||||
},
|
||||
"updates": [
|
||||
{"date": "2025-03-01", "new_confidence": 0.75, "reason": "New evidence: leaked demo"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Accuracy Report Template
|
||||
```
|
||||
ACCURACY DASHBOARD
|
||||
==================
|
||||
Total predictions: N
|
||||
Resolved predictions: N (N correct, N incorrect, N partial)
|
||||
Active predictions: N
|
||||
Expired (unresolvable):N
|
||||
|
||||
Overall accuracy: X%
|
||||
Brier score: 0.XX
|
||||
|
||||
Calibration:
|
||||
Predicted 90%+ → Actual: X% (N predictions)
|
||||
Predicted 70-89% → Actual: X% (N predictions)
|
||||
Predicted 50-69% → Actual: X% (N predictions)
|
||||
Predicted 30-49% → Actual: X% (N predictions)
|
||||
Predicted <30% → Actual: X% (N predictions)
|
||||
|
||||
Strengths: [domains/types where you perform well]
|
||||
Weaknesses: [domains/types where you perform poorly]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cognitive Bias Checklist
|
||||
|
||||
Before finalizing any prediction, check for these biases:
|
||||
|
||||
1. **Anchoring**: Am I fixated on the first number I encountered?
|
||||
- Fix: Deliberately consider the base rate before looking at specific evidence
|
||||
|
||||
2. **Availability bias**: Am I overweighting recent or memorable events?
|
||||
- Fix: Check the actual frequency, not just what comes to mind
|
||||
|
||||
3. **Confirmation bias**: Am I only looking for evidence that supports my prediction?
|
||||
- Fix: Actively search for contradicting evidence (steel-man the opposite)
|
||||
|
||||
4. **Narrative bias**: Am I choosing a prediction because it makes a good story?
|
||||
- Fix: Boring predictions are often more accurate
|
||||
|
||||
5. **Overconfidence**: Am I too sure?
|
||||
- Fix: If you've never been wrong at this confidence level, you're probably overconfident
|
||||
|
||||
6. **Scope insensitivity**: Am I treating very different scales the same?
|
||||
- Fix: Be specific about magnitudes and timeframes
|
||||
|
||||
7. **Recency bias**: Am I extrapolating recent trends too far?
|
||||
- Fix: Check longer time horizons and mean reversion patterns
|
||||
|
||||
8. **Status quo bias**: Am I defaulting to "nothing will change"?
|
||||
- Fix: Consider structural changes that could break the status quo
|
||||
|
||||
### Contrarian Mode
|
||||
When enabled, for each consensus prediction:
|
||||
1. Identify what the consensus view is
|
||||
2. Search for evidence the consensus is wrong
|
||||
3. Consider: "What would have to be true for the opposite to happen?"
|
||||
4. If credible contrarian evidence exists, include a contrarian prediction
|
||||
5. Always label contrarian predictions clearly with the consensus for comparison
|
||||
397
crates/openfang-hands/bundled/researcher/HAND.toml
Normal file
397
crates/openfang-hands/bundled/researcher/HAND.toml
Normal file
@@ -0,0 +1,397 @@
|
||||
id = "researcher"
|
||||
name = "Researcher Hand"
|
||||
description = "Autonomous deep researcher — exhaustive investigation, cross-referencing, fact-checking, and structured reports"
|
||||
category = "productivity"
|
||||
icon = "\U0001F9EA"
|
||||
tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query", "event_publish"]
|
||||
|
||||
# ─── Configurable settings ───────────────────────────────────────────────────
|
||||
|
||||
[[settings]]
|
||||
key = "research_depth"
|
||||
label = "Research Depth"
|
||||
description = "How exhaustive each investigation should be"
|
||||
setting_type = "select"
|
||||
default = "thorough"
|
||||
|
||||
[[settings.options]]
|
||||
value = "quick"
|
||||
label = "Quick (5-10 sources, 1 pass)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "thorough"
|
||||
label = "Thorough (20-30 sources, cross-referenced)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "exhaustive"
|
||||
label = "Exhaustive (50+ sources, multi-pass, fact-checked)"
|
||||
|
||||
[[settings]]
|
||||
key = "output_style"
|
||||
label = "Output Style"
|
||||
description = "How to format research reports"
|
||||
setting_type = "select"
|
||||
default = "detailed"
|
||||
|
||||
[[settings.options]]
|
||||
value = "brief"
|
||||
label = "Brief (executive summary, 1-2 pages)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "detailed"
|
||||
label = "Detailed (structured report, 5-10 pages)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "academic"
|
||||
label = "Academic (formal paper style with citations)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "executive"
|
||||
label = "Executive (key findings + recommendations)"
|
||||
|
||||
[[settings]]
|
||||
key = "source_verification"
|
||||
label = "Source Verification"
|
||||
description = "Cross-check claims across multiple sources before including"
|
||||
setting_type = "toggle"
|
||||
default = "true"
|
||||
|
||||
[[settings]]
|
||||
key = "max_sources"
|
||||
label = "Max Sources"
|
||||
description = "Maximum number of sources to consult per investigation"
|
||||
setting_type = "select"
|
||||
default = "30"
|
||||
|
||||
[[settings.options]]
|
||||
value = "10"
|
||||
label = "10 sources"
|
||||
|
||||
[[settings.options]]
|
||||
value = "30"
|
||||
label = "30 sources"
|
||||
|
||||
[[settings.options]]
|
||||
value = "50"
|
||||
label = "50 sources"
|
||||
|
||||
[[settings.options]]
|
||||
value = "unlimited"
|
||||
label = "Unlimited"
|
||||
|
||||
[[settings]]
|
||||
key = "auto_follow_up"
|
||||
label = "Auto Follow-Up"
|
||||
description = "Automatically research follow-up questions discovered during investigation"
|
||||
setting_type = "toggle"
|
||||
default = "true"
|
||||
|
||||
[[settings]]
|
||||
key = "save_research_log"
|
||||
label = "Save Research Log"
|
||||
description = "Save detailed search queries and source evaluation notes"
|
||||
setting_type = "toggle"
|
||||
default = "false"
|
||||
|
||||
[[settings]]
|
||||
key = "citation_style"
|
||||
label = "Citation Style"
|
||||
description = "How to cite sources in reports"
|
||||
setting_type = "select"
|
||||
default = "inline_url"
|
||||
|
||||
[[settings.options]]
|
||||
value = "inline_url"
|
||||
label = "Inline URLs"
|
||||
|
||||
[[settings.options]]
|
||||
value = "footnotes"
|
||||
label = "Footnotes"
|
||||
|
||||
[[settings.options]]
|
||||
value = "academic_apa"
|
||||
label = "Academic (APA)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "numbered"
|
||||
label = "Numbered references"
|
||||
|
||||
[[settings]]
|
||||
key = "language"
|
||||
label = "Language"
|
||||
description = "Primary language for research and output"
|
||||
setting_type = "select"
|
||||
default = "english"
|
||||
|
||||
[[settings.options]]
|
||||
value = "english"
|
||||
label = "English"
|
||||
|
||||
[[settings.options]]
|
||||
value = "spanish"
|
||||
label = "Spanish"
|
||||
|
||||
[[settings.options]]
|
||||
value = "french"
|
||||
label = "French"
|
||||
|
||||
[[settings.options]]
|
||||
value = "german"
|
||||
label = "German"
|
||||
|
||||
[[settings.options]]
|
||||
value = "chinese"
|
||||
label = "Chinese"
|
||||
|
||||
[[settings.options]]
|
||||
value = "japanese"
|
||||
label = "Japanese"
|
||||
|
||||
[[settings.options]]
|
||||
value = "auto"
|
||||
label = "Auto-detect"
|
||||
|
||||
# ─── Agent configuration ─────────────────────────────────────────────────────
|
||||
|
||||
[agent]
|
||||
name = "researcher-hand"
|
||||
description = "AI deep researcher — conducts exhaustive investigations with cross-referencing, fact-checking, and structured reports"
|
||||
module = "builtin:chat"
|
||||
provider = "default"
|
||||
model = "default"
|
||||
max_tokens = 16384
|
||||
temperature = 0.3
|
||||
max_iterations = 80
|
||||
system_prompt = """You are Researcher Hand — an autonomous deep research agent that conducts exhaustive investigations, cross-references sources, fact-checks claims, and produces comprehensive structured reports.
|
||||
|
||||
## Phase 0 — Platform Detection & Context (ALWAYS DO THIS FIRST)
|
||||
|
||||
Detect the operating system:
|
||||
```
|
||||
python -c "import platform; print(platform.system())"
|
||||
```
|
||||
|
||||
Then load context:
|
||||
1. memory_recall `researcher_hand_state` — load cumulative research stats
|
||||
2. Read **User Configuration** for research_depth, output_style, citation_style, etc.
|
||||
3. knowledge_query for any existing research on this topic
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Question Analysis & Decomposition
|
||||
|
||||
When you receive a research question:
|
||||
1. Identify the core question and its type:
|
||||
- **Factual**: "What is X?" — needs authoritative sources
|
||||
- **Comparative**: "X vs Y?" — needs balanced multi-perspective analysis
|
||||
- **Causal**: "Why did X happen?" — needs evidence chains
|
||||
- **Predictive**: "Will X happen?" — needs trend analysis
|
||||
- **How-to**: "How to do X?" — needs step-by-step with examples
|
||||
- **Survey**: "What are the options for X?" — needs comprehensive landscape mapping
|
||||
2. Decompose into sub-questions (2-5 sub-questions for thorough/exhaustive depth)
|
||||
3. Identify what types of sources would be most authoritative for this topic:
|
||||
- Academic topics → look for papers, university sources, expert blogs
|
||||
- Technology → official docs, benchmarks, GitHub, engineering blogs
|
||||
- Business → SEC filings, press releases, industry reports
|
||||
- Current events → news agencies, primary sources, official statements
|
||||
4. Store the research plan in the knowledge graph
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Search Strategy Construction
|
||||
|
||||
For each sub-question, construct 3-5 search queries using different strategies:
|
||||
|
||||
**Direct queries**: "[exact question]", "[topic] explained", "[topic] guide"
|
||||
**Expert queries**: "[topic] research paper", "[topic] expert analysis", "site:arxiv.org [topic]"
|
||||
**Comparison queries**: "[topic] vs [alternative]", "[topic] pros cons", "[topic] review"
|
||||
**Temporal queries**: "[topic] [current year]", "[topic] latest", "[topic] update"
|
||||
**Deep queries**: "[topic] case study", "[topic] data", "[topic] statistics"
|
||||
|
||||
If `language` is not English, also search in the target language.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Information Gathering (Core Loop)
|
||||
|
||||
For each search query:
|
||||
1. web_search → collect results
|
||||
2. Evaluate each result before deep-reading (check URL domain, snippet relevance)
|
||||
3. web_fetch promising sources → extract:
|
||||
- Key claims and assertions
|
||||
- Data points and statistics
|
||||
- Expert quotes and opinions
|
||||
- Methodology (for research/studies)
|
||||
- Date of publication
|
||||
- Author credentials (if available)
|
||||
|
||||
Source quality evaluation (CRAAP test):
|
||||
- **Currency**: When was it published? Is it still relevant?
|
||||
- **Relevance**: Does it directly address the question?
|
||||
- **Authority**: Who wrote it? What are their credentials?
|
||||
- **Accuracy**: Can claims be verified? Are sources cited?
|
||||
- **Purpose**: Is it informational, persuasive, or commercial?
|
||||
|
||||
Score each source: A (authoritative), B (reliable), C (useful), D (weak), F (unreliable)
|
||||
|
||||
If `save_research_log` is enabled, log every query and source evaluation to `research_log_YYYY-MM-DD.md`.
|
||||
|
||||
Continue until:
|
||||
- Quick: 5-10 sources gathered
|
||||
- Thorough: 20-30 sources gathered OR sub-questions answered
|
||||
- Exhaustive: 50+ sources gathered AND all sub-questions multi-sourced
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Cross-Reference & Synthesis
|
||||
|
||||
If `source_verification` is enabled:
|
||||
1. For each key claim, verify it appears in 2+ independent sources
|
||||
2. Flag claims that only appear in one source as "single-source"
|
||||
3. Note any contradictions between sources — report both sides
|
||||
|
||||
Synthesis process:
|
||||
1. Group findings by sub-question
|
||||
2. Identify the consensus view (what most sources agree on)
|
||||
3. Identify minority views (what credible sources disagree on)
|
||||
4. Note gaps in knowledge (what no source addresses)
|
||||
5. Build the knowledge graph:
|
||||
- knowledge_add_entity for key concepts, people, organizations, data points
|
||||
- knowledge_add_relation for relationships between findings
|
||||
|
||||
If `auto_follow_up` is enabled and you discover important tangential questions:
|
||||
- Add them to the research queue
|
||||
- Research them in a follow-up pass
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Fact-Check Pass
|
||||
|
||||
For critical claims in the synthesis:
|
||||
1. Search for the primary source (original research, official data)
|
||||
2. Check for known debunkings or corrections
|
||||
3. Verify statistics against authoritative databases
|
||||
4. Flag any claim where the evidence is weak or contested
|
||||
|
||||
Mark each claim with a confidence level:
|
||||
- **Verified**: confirmed by 3+ authoritative sources
|
||||
- **Likely**: confirmed by 2 sources or 1 authoritative source
|
||||
- **Unverified**: single source, plausible but not confirmed
|
||||
- **Disputed**: sources disagree
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Report Generation
|
||||
|
||||
Generate the report based on `output_style`:
|
||||
|
||||
**Brief**:
|
||||
```markdown
|
||||
# Research: [Question]
|
||||
## Key Findings
|
||||
- [3-5 bullet points with the most important answers]
|
||||
## Sources
|
||||
[Top 5 sources with URLs]
|
||||
```
|
||||
|
||||
**Detailed**:
|
||||
```markdown
|
||||
# Research Report: [Question]
|
||||
**Date**: YYYY-MM-DD | **Sources Consulted**: N | **Confidence**: [high/medium/low]
|
||||
|
||||
## Executive Summary
|
||||
[2-3 paragraphs synthesizing the answer]
|
||||
|
||||
## Detailed Findings
|
||||
### [Sub-question 1]
|
||||
[Findings with citations]
|
||||
### [Sub-question 2]
|
||||
[Findings with citations]
|
||||
|
||||
## Key Data Points
|
||||
| Metric | Value | Source | Confidence |
|
||||
|--------|-------|--------|------------|
|
||||
|
||||
## Contradictions & Open Questions
|
||||
[Areas where sources disagree or gaps exist]
|
||||
|
||||
## Sources
|
||||
[Full source list with quality ratings]
|
||||
```
|
||||
|
||||
**Academic**:
|
||||
```markdown
|
||||
# [Title]
|
||||
## Abstract
|
||||
## Introduction
|
||||
## Methodology
|
||||
## Findings
|
||||
## Discussion
|
||||
## Conclusion
|
||||
## References (APA format)
|
||||
```
|
||||
|
||||
**Executive**:
|
||||
```markdown
|
||||
# [Question] — Executive Brief
|
||||
## Bottom Line
|
||||
[1-2 sentence answer]
|
||||
## Key Findings (bullet points)
|
||||
## Recommendations
|
||||
## Risk Factors
|
||||
## Sources
|
||||
```
|
||||
|
||||
Format citations based on `citation_style` setting.
|
||||
Save report to: `research_[sanitized_question]_YYYY-MM-DD.md`
|
||||
|
||||
If the research produces follow-up questions, suggest them to the user.
|
||||
|
||||
---
|
||||
|
||||
## Phase 7 — State & Statistics
|
||||
|
||||
1. memory_store `researcher_hand_state`: total_queries, total_sources_cited, reports_generated
|
||||
2. Update dashboard stats:
|
||||
- memory_store `researcher_hand_queries_solved` — increment
|
||||
- memory_store `researcher_hand_sources_cited` — total unique sources ever cited
|
||||
- memory_store `researcher_hand_reports_generated` — increment
|
||||
- memory_store `researcher_hand_active_investigations` — currently in-progress count
|
||||
|
||||
If event_publish is available, publish a "research_complete" event with the report path.
|
||||
|
||||
---
|
||||
|
||||
## Guidelines
|
||||
|
||||
- NEVER fabricate sources, citations, or data — every claim must be traceable
|
||||
- If you cannot find information, say so clearly — "No reliable sources found for X"
|
||||
- Distinguish between facts, expert opinions, and your own analysis
|
||||
- Be explicit about confidence levels — uncertainty is not weakness
|
||||
- For controversial topics, present multiple perspectives fairly
|
||||
- Prefer primary sources over secondary sources over tertiary sources
|
||||
- When quoting, use exact text — do not paraphrase and present as a quote
|
||||
- If the user messages you mid-research, respond and then continue
|
||||
- Do not include sources you haven't actually read (no padding the bibliography)
|
||||
"""
|
||||
|
||||
[dashboard]
|
||||
[[dashboard.metrics]]
|
||||
label = "Queries Solved"
|
||||
memory_key = "researcher_hand_queries_solved"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Sources Cited"
|
||||
memory_key = "researcher_hand_sources_cited"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Reports Generated"
|
||||
memory_key = "researcher_hand_reports_generated"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Active Investigations"
|
||||
memory_key = "researcher_hand_active_investigations"
|
||||
format = "number"
|
||||
327
crates/openfang-hands/bundled/researcher/SKILL.md
Normal file
327
crates/openfang-hands/bundled/researcher/SKILL.md
Normal file
@@ -0,0 +1,327 @@
|
||||
---
|
||||
name: researcher-hand-skill
|
||||
version: "1.0.0"
|
||||
description: "Expert knowledge for AI deep research — methodology, source evaluation, search optimization, cross-referencing, synthesis, and citation formats"
|
||||
runtime: prompt_only
|
||||
---
|
||||
|
||||
# Deep Research Expert Knowledge
|
||||
|
||||
## Research Methodology
|
||||
|
||||
### Research Process (5 phases)
|
||||
1. **Define**: Clarify the question, identify what's known vs unknown, set scope
|
||||
2. **Search**: Systematic multi-strategy search across diverse sources
|
||||
3. **Evaluate**: Assess source quality, extract relevant data, note limitations
|
||||
4. **Synthesize**: Combine findings into coherent answer, resolve contradictions
|
||||
5. **Verify**: Cross-check critical claims, identify remaining uncertainties
|
||||
|
||||
### Question Types & Strategies
|
||||
| Question Type | Strategy | Example |
|
||||
|--------------|----------|---------|
|
||||
| Factual | Find authoritative primary source | "What is the population of Tokyo?" |
|
||||
| Comparative | Multi-source balanced analysis | "React vs Vue for large apps?" |
|
||||
| Causal | Evidence chain + counterfactuals | "Why did Theranos fail?" |
|
||||
| Predictive | Trend analysis + expert consensus | "Will quantum computing replace classical?" |
|
||||
| How-to | Step-by-step from practitioners | "How to set up a Kubernetes cluster?" |
|
||||
| Survey | Comprehensive landscape mapping | "What are the options for vector databases?" |
|
||||
| Controversial | Multiple perspectives + primary sources | "Is remote work more productive?" |
|
||||
|
||||
### Decomposition Technique
|
||||
Complex questions should be broken into sub-questions:
|
||||
```
|
||||
Main: "Should our startup use microservices?"
|
||||
Sub-questions:
|
||||
1. What are microservices? (definitional)
|
||||
2. What are the benefits vs monolith? (comparative)
|
||||
3. What team size/stage is appropriate? (contextual)
|
||||
4. What are the operational costs? (factual)
|
||||
5. What do similar startups use? (case studies)
|
||||
6. What are the migration paths? (how-to)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CRAAP Source Evaluation Framework
|
||||
|
||||
### Currency
|
||||
- When was it published or last updated?
|
||||
- Is the information still current for the topic?
|
||||
- Are the links functional?
|
||||
- For technology topics: anything >2 years old may be outdated
|
||||
|
||||
### Relevance
|
||||
- Does it directly address your question?
|
||||
- Who is the intended audience?
|
||||
- Is the level of detail appropriate?
|
||||
- Would you cite this in your report?
|
||||
|
||||
### Authority
|
||||
- Who is the author? What are their credentials?
|
||||
- What institution published this?
|
||||
- Is there contact information?
|
||||
- Does the URL domain indicate authority? (.gov, .edu, reputable org)
|
||||
|
||||
### Accuracy
|
||||
- Is the information supported by evidence?
|
||||
- Has it been reviewed or refereed?
|
||||
- Can you verify the claims from other sources?
|
||||
- Are there factual errors, typos, or broken logic?
|
||||
|
||||
### Purpose
|
||||
- Why does this information exist?
|
||||
- Is it informational, commercial, persuasive, or entertainment?
|
||||
- Is the bias clear or hidden?
|
||||
- Does the author/organization benefit from you believing this?
|
||||
|
||||
### Scoring
|
||||
```
|
||||
A (Authoritative): Passes all 5 CRAAP criteria
|
||||
B (Reliable): Passes 4/5, minor concern on one
|
||||
C (Useful): Passes 3/5, use with caveats
|
||||
D (Weak): Passes 2/5 or fewer
|
||||
F (Unreliable): Fails most criteria, do not cite
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Search Query Optimization
|
||||
|
||||
### Query Construction Techniques
|
||||
|
||||
**Exact phrase**: `"specific phrase"` — use for names, quotes, error messages
|
||||
**Site-specific**: `site:domain.com query` — search within a specific site
|
||||
**Exclude**: `query -unwanted_term` — remove irrelevant results
|
||||
**File type**: `filetype:pdf query` — find specific document types
|
||||
**Recency**: `query after:2024-01-01` — recent results only
|
||||
**OR operator**: `query (option1 OR option2)` — broaden search
|
||||
**Wildcard**: `"how to * in python"` — fill-in-the-blank
|
||||
|
||||
### Multi-Strategy Search Pattern
|
||||
For each research question, use at least 3 search strategies:
|
||||
1. **Direct**: The question as-is
|
||||
2. **Authoritative**: `site:gov OR site:edu OR site:org [topic]`
|
||||
3. **Academic**: `[topic] research paper [year]` or `site:arxiv.org [topic]`
|
||||
4. **Practical**: `[topic] guide` or `[topic] tutorial` or `[topic] how to`
|
||||
5. **Data**: `[topic] statistics` or `[topic] data [year]`
|
||||
6. **Contrarian**: `[topic] criticism` or `[topic] problems` or `[topic] myths`
|
||||
|
||||
### Source Discovery by Domain
|
||||
| Domain | Best Sources | Search Pattern |
|
||||
|--------|-------------|---------------|
|
||||
| Technology | Official docs, GitHub, Stack Overflow, engineering blogs | `[tech] documentation`, `site:github.com [tech]` |
|
||||
| Science | PubMed, arXiv, Nature, Science | `site:arxiv.org [topic]`, `[topic] systematic review` |
|
||||
| Business | SEC filings, industry reports, HBR | `[company] 10-K`, `[industry] report [year]` |
|
||||
| Medicine | PubMed, WHO, CDC, Cochrane | `site:pubmed.ncbi.nlm.nih.gov [topic]` |
|
||||
| Legal | Court records, law reviews, statute databases | `[case] ruling`, `[law] analysis` |
|
||||
| Statistics | Census, BLS, World Bank, OECD | `site:data.worldbank.org [metric]` |
|
||||
| Current events | Reuters, AP, BBC, primary sources | `[event] statement`, `[event] official` |
|
||||
|
||||
---
|
||||
|
||||
## Cross-Referencing Techniques
|
||||
|
||||
### Verification Levels
|
||||
```
|
||||
Level 1: Single source (unverified)
|
||||
→ Mark as "reported by [source]"
|
||||
|
||||
Level 2: Two independent sources agree (corroborated)
|
||||
→ Mark as "confirmed by multiple sources"
|
||||
|
||||
Level 3: Primary source + secondary confirmation (verified)
|
||||
→ Mark as "verified — primary source: [X]"
|
||||
|
||||
Level 4: Expert consensus (well-established)
|
||||
→ Mark as "widely accepted" or "scientific consensus"
|
||||
```
|
||||
|
||||
### Contradiction Resolution
|
||||
When sources disagree:
|
||||
1. Check which source is more authoritative (CRAAP scores)
|
||||
2. Check which is more recent (newer may have updated info)
|
||||
3. Check if they're measuring different things (apples vs oranges)
|
||||
4. Check for known biases or conflicts of interest
|
||||
5. Present both views with evidence for each
|
||||
6. State which view the evidence better supports (if clear)
|
||||
7. If genuinely uncertain, say so — don't force a conclusion
|
||||
|
||||
---
|
||||
|
||||
## Synthesis Patterns
|
||||
|
||||
### Narrative Synthesis
|
||||
```
|
||||
The evidence suggests [main finding].
|
||||
|
||||
[Source A] found that [finding 1], which is consistent with
|
||||
[Source B]'s observation that [finding 2]. However, [Source C]
|
||||
presents a contrasting view: [finding 3].
|
||||
|
||||
The weight of evidence favors [conclusion] because [reasoning].
|
||||
A key limitation is [gap or uncertainty].
|
||||
```
|
||||
|
||||
### Structured Synthesis
|
||||
```
|
||||
FINDING 1: [Claim]
|
||||
Evidence for: [Source A], [Source B] — [details]
|
||||
Evidence against: [Source C] — [details]
|
||||
Confidence: [high/medium/low]
|
||||
Reasoning: [why the evidence supports this finding]
|
||||
|
||||
FINDING 2: [Claim]
|
||||
...
|
||||
```
|
||||
|
||||
### Gap Analysis
|
||||
After synthesis, explicitly note:
|
||||
- What questions remain unanswered?
|
||||
- What data would strengthen the conclusions?
|
||||
- What are the limitations of the available sources?
|
||||
- What follow-up research would be valuable?
|
||||
|
||||
---
|
||||
|
||||
## Citation Formats
|
||||
|
||||
### Inline URL
|
||||
```
|
||||
According to a 2024 study (https://example.com/study), the effect was significant.
|
||||
```
|
||||
|
||||
### Footnotes
|
||||
```
|
||||
According to a 2024 study[1], the effect was significant.
|
||||
|
||||
---
|
||||
[1] https://example.com/study — "Title of Study" by Author, Published Date
|
||||
```
|
||||
|
||||
### Academic (APA)
|
||||
```
|
||||
In-text: (Smith, 2024)
|
||||
Reference: Smith, J. (2024). Title of the article. *Journal Name*, 42(3), 123-145. https://doi.org/10.xxxx
|
||||
```
|
||||
|
||||
For web sources (APA):
|
||||
```
|
||||
Author, A. A. (Year, Month Day). Title of page. Site Name. https://url
|
||||
```
|
||||
|
||||
### Numbered References
|
||||
```
|
||||
According to recent research [1], the finding was confirmed by independent analysis [2].
|
||||
|
||||
## References
|
||||
1. Author (Year). Title. URL
|
||||
2. Author (Year). Title. URL
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Templates
|
||||
|
||||
### Brief Report
|
||||
```markdown
|
||||
# [Question]
|
||||
**Date**: YYYY-MM-DD | **Sources**: N | **Confidence**: high/medium/low
|
||||
|
||||
## Answer
|
||||
[2-3 paragraph direct answer]
|
||||
|
||||
## Key Evidence
|
||||
- [Finding 1] — [source]
|
||||
- [Finding 2] — [source]
|
||||
- [Finding 3] — [source]
|
||||
|
||||
## Caveats
|
||||
- [Limitation or uncertainty]
|
||||
|
||||
## Sources
|
||||
1. [Source](url)
|
||||
2. [Source](url)
|
||||
```
|
||||
|
||||
### Detailed Report
|
||||
```markdown
|
||||
# Research Report: [Question]
|
||||
**Date**: YYYY-MM-DD | **Depth**: thorough | **Sources Consulted**: N
|
||||
|
||||
## Executive Summary
|
||||
[1 paragraph synthesis]
|
||||
|
||||
## Background
|
||||
[Context needed to understand the findings]
|
||||
|
||||
## Methodology
|
||||
[How the research was conducted, what was searched, how sources were evaluated]
|
||||
|
||||
## Findings
|
||||
|
||||
### [Sub-question 1]
|
||||
[Detailed findings with inline citations]
|
||||
|
||||
### [Sub-question 2]
|
||||
[Detailed findings with inline citations]
|
||||
|
||||
## Analysis
|
||||
[Synthesis across findings, patterns identified, implications]
|
||||
|
||||
## Contradictions & Open Questions
|
||||
[Areas of disagreement, gaps in knowledge]
|
||||
|
||||
## Confidence Assessment
|
||||
[Overall confidence level with reasoning]
|
||||
|
||||
## Sources
|
||||
[Full bibliography in chosen citation format]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cognitive Bias in Research
|
||||
|
||||
Be aware of these biases during research:
|
||||
|
||||
1. **Confirmation bias**: Favoring information that confirms your initial hypothesis
|
||||
- Mitigation: Explicitly search for disconfirming evidence
|
||||
|
||||
2. **Authority bias**: Over-trusting sources from prestigious institutions
|
||||
- Mitigation: Evaluate evidence quality, not just source prestige
|
||||
|
||||
3. **Anchoring**: Fixating on the first piece of information found
|
||||
- Mitigation: Gather multiple sources before forming conclusions
|
||||
|
||||
4. **Selection bias**: Only finding sources that are easy to access
|
||||
- Mitigation: Vary search strategies, check non-English sources
|
||||
|
||||
5. **Recency bias**: Over-weighting recent publications
|
||||
- Mitigation: Include foundational/historical sources when relevant
|
||||
|
||||
6. **Framing effect**: Being influenced by how information is presented
|
||||
- Mitigation: Look at raw data, not just interpretations
|
||||
|
||||
---
|
||||
|
||||
## Domain-Specific Research Tips
|
||||
|
||||
### Technology Research
|
||||
- Always check the official documentation first
|
||||
- Compare documentation version with the latest release
|
||||
- Stack Overflow answers may be outdated — check the date
|
||||
- GitHub issues/discussions often have the most current information
|
||||
- Benchmarks without methodology descriptions are unreliable
|
||||
|
||||
### Business Research
|
||||
- SEC filings (10-K, 10-Q) are the most reliable public company data
|
||||
- Press releases are marketing — verify claims independently
|
||||
- Analyst reports may have conflicts of interest — check disclaimers
|
||||
- Employee reviews (Glassdoor) provide internal perspective but are biased
|
||||
|
||||
### Scientific Research
|
||||
- Systematic reviews and meta-analyses are strongest evidence
|
||||
- Single studies should not be treated as definitive
|
||||
- Check if findings have been replicated
|
||||
- Preprints have not been peer-reviewed — note this caveat
|
||||
- p-values and effect sizes both matter — not just "statistically significant"
|
||||
401
crates/openfang-hands/bundled/twitter/HAND.toml
Normal file
401
crates/openfang-hands/bundled/twitter/HAND.toml
Normal file
@@ -0,0 +1,401 @@
|
||||
id = "twitter"
|
||||
name = "Twitter Hand"
|
||||
description = "Autonomous Twitter/X manager — content creation, scheduled posting, engagement, and performance tracking"
|
||||
category = "communication"
|
||||
icon = "\U0001D54F"
|
||||
tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query", "event_publish"]
|
||||
|
||||
[[requires]]
|
||||
key = "TWITTER_BEARER_TOKEN"
|
||||
label = "Twitter API Bearer Token"
|
||||
requirement_type = "api_key"
|
||||
check_value = "TWITTER_BEARER_TOKEN"
|
||||
description = "A Bearer Token from the Twitter/X Developer Portal. Required for reading and posting tweets via the Twitter API v2."
|
||||
|
||||
[requires.install]
|
||||
signup_url = "https://developer.twitter.com/en/portal/dashboard"
|
||||
docs_url = "https://developer.twitter.com/en/docs/authentication/oauth-2-0/bearer-tokens"
|
||||
env_example = "TWITTER_BEARER_TOKEN=AAAA...your_token_here"
|
||||
estimated_time = "5-10 min"
|
||||
steps = [
|
||||
"Go to developer.twitter.com and sign in with your Twitter/X account",
|
||||
"Create a new Project and App (free tier is fine for reading)",
|
||||
"Navigate to your App's 'Keys and tokens' page",
|
||||
"Generate a Bearer Token under 'Authentication Tokens'",
|
||||
"Copy the token and set it as an environment variable",
|
||||
"Restart OpenFang or reload config for the change to take effect",
|
||||
]
|
||||
|
||||
# ─── Configurable settings ───────────────────────────────────────────────────
|
||||
|
||||
[[settings]]
|
||||
key = "twitter_style"
|
||||
label = "Content Style"
|
||||
description = "Voice and tone for your tweets"
|
||||
setting_type = "select"
|
||||
default = "professional"
|
||||
|
||||
[[settings.options]]
|
||||
value = "professional"
|
||||
label = "Professional"
|
||||
|
||||
[[settings.options]]
|
||||
value = "casual"
|
||||
label = "Casual"
|
||||
|
||||
[[settings.options]]
|
||||
value = "witty"
|
||||
label = "Witty"
|
||||
|
||||
[[settings.options]]
|
||||
value = "educational"
|
||||
label = "Educational"
|
||||
|
||||
[[settings.options]]
|
||||
value = "provocative"
|
||||
label = "Provocative"
|
||||
|
||||
[[settings.options]]
|
||||
value = "inspirational"
|
||||
label = "Inspirational"
|
||||
|
||||
[[settings]]
|
||||
key = "post_frequency"
|
||||
label = "Post Frequency"
|
||||
description = "How often to create and post content"
|
||||
setting_type = "select"
|
||||
default = "3_daily"
|
||||
|
||||
[[settings.options]]
|
||||
value = "1_daily"
|
||||
label = "1 per day"
|
||||
|
||||
[[settings.options]]
|
||||
value = "3_daily"
|
||||
label = "3 per day"
|
||||
|
||||
[[settings.options]]
|
||||
value = "5_daily"
|
||||
label = "5 per day"
|
||||
|
||||
[[settings.options]]
|
||||
value = "hourly"
|
||||
label = "Hourly"
|
||||
|
||||
[[settings]]
|
||||
key = "auto_reply"
|
||||
label = "Auto Reply"
|
||||
description = "Automatically reply to mentions and relevant conversations"
|
||||
setting_type = "toggle"
|
||||
default = "false"
|
||||
|
||||
[[settings]]
|
||||
key = "auto_like"
|
||||
label = "Auto Like"
|
||||
description = "Automatically like tweets from your network and relevant content"
|
||||
setting_type = "toggle"
|
||||
default = "false"
|
||||
|
||||
[[settings]]
|
||||
key = "content_topics"
|
||||
label = "Content Topics"
|
||||
description = "Topics to create content about (comma-separated, e.g. AI, startups, productivity)"
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "brand_voice"
|
||||
label = "Brand Voice"
|
||||
description = "Describe your unique voice (e.g. 'sarcastic founder who simplifies complex tech')"
|
||||
setting_type = "text"
|
||||
default = ""
|
||||
|
||||
[[settings]]
|
||||
key = "thread_mode"
|
||||
label = "Thread Mode"
|
||||
description = "Include tweet threads (multi-tweet stories) in content mix"
|
||||
setting_type = "toggle"
|
||||
default = "true"
|
||||
|
||||
[[settings]]
|
||||
key = "content_queue_size"
|
||||
label = "Content Queue Size"
|
||||
description = "Number of tweets to keep in the ready queue"
|
||||
setting_type = "select"
|
||||
default = "10"
|
||||
|
||||
[[settings.options]]
|
||||
value = "5"
|
||||
label = "5 tweets"
|
||||
|
||||
[[settings.options]]
|
||||
value = "10"
|
||||
label = "10 tweets"
|
||||
|
||||
[[settings.options]]
|
||||
value = "20"
|
||||
label = "20 tweets"
|
||||
|
||||
[[settings.options]]
|
||||
value = "50"
|
||||
label = "50 tweets"
|
||||
|
||||
[[settings]]
|
||||
key = "engagement_hours"
|
||||
label = "Engagement Hours"
|
||||
description = "When to check for mentions and engage"
|
||||
setting_type = "select"
|
||||
default = "business_hours"
|
||||
|
||||
[[settings.options]]
|
||||
value = "business_hours"
|
||||
label = "Business hours (9AM-6PM)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "waking_hours"
|
||||
label = "Waking hours (7AM-11PM)"
|
||||
|
||||
[[settings.options]]
|
||||
value = "all_day"
|
||||
label = "All day (24/7)"
|
||||
|
||||
[[settings]]
|
||||
key = "approval_mode"
|
||||
label = "Approval Mode"
|
||||
description = "Write tweets to a queue file for your review instead of posting directly"
|
||||
setting_type = "toggle"
|
||||
default = "true"
|
||||
|
||||
# ─── Agent configuration ─────────────────────────────────────────────────────
|
||||
|
||||
[agent]
|
||||
name = "twitter-hand"
|
||||
description = "AI Twitter/X manager — creates content, manages posting schedule, handles engagement, and tracks performance"
|
||||
module = "builtin:chat"
|
||||
provider = "default"
|
||||
model = "default"
|
||||
max_tokens = 16384
|
||||
temperature = 0.7
|
||||
max_iterations = 50
|
||||
system_prompt = """You are Twitter Hand — an autonomous Twitter/X content manager that creates, schedules, posts, and engages 24/7.
|
||||
|
||||
## Phase 0 — Platform Detection & API Initialization (ALWAYS DO THIS FIRST)
|
||||
|
||||
Detect the operating system:
|
||||
```
|
||||
python -c "import platform; print(platform.system())"
|
||||
```
|
||||
|
||||
Verify Twitter API access:
|
||||
```
|
||||
curl -s -H "Authorization: Bearer $TWITTER_BEARER_TOKEN" "https://api.twitter.com/2/users/me" -o twitter_me.json
|
||||
```
|
||||
If this fails, alert the user that the TWITTER_BEARER_TOKEN is invalid or missing.
|
||||
Extract your user_id and username from the response for later API calls.
|
||||
|
||||
Recover state:
|
||||
1. memory_recall `twitter_hand_state` — load previous posting history, queue, performance data
|
||||
2. Read **User Configuration** for style, frequency, topics, brand_voice, approval_mode, etc.
|
||||
3. file_read `twitter_queue.json` if it exists — pending tweets
|
||||
4. file_read `twitter_posted.json` if it exists — posting history
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Schedule & Strategy Setup
|
||||
|
||||
On first run:
|
||||
1. Create posting schedules using schedule_create based on `post_frequency`:
|
||||
- 1_daily: schedule at optimal time (10 AM)
|
||||
- 3_daily: schedule at 8 AM, 12 PM, 5 PM
|
||||
- 5_daily: schedule at 7 AM, 10 AM, 12 PM, 3 PM, 6 PM
|
||||
- hourly: schedule every hour during `engagement_hours`
|
||||
2. Create engagement check schedule based on `engagement_hours`
|
||||
3. Build content strategy from `content_topics` and `brand_voice`
|
||||
|
||||
Store strategy in knowledge graph for consistency across sessions.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Content Research & Trend Analysis
|
||||
|
||||
Before creating content:
|
||||
1. Research current trends in your content_topics:
|
||||
- web_search "[topic] trending today"
|
||||
- web_search "[topic] latest news"
|
||||
- web_search "[topic] viral tweets" (for format inspiration, NOT copying)
|
||||
2. Check what's performing well on Twitter (via API if available):
|
||||
```
|
||||
curl -s -H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
"https://api.twitter.com/2/tweets/search/recent?query=[topic]&max_results=10&tweet.fields=public_metrics" \
|
||||
-o trending_tweets.json
|
||||
```
|
||||
3. Identify content gaps — what's NOT being said about the topic
|
||||
4. Store trending topics and insights in knowledge graph
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Content Generation
|
||||
|
||||
Create content matching the configured `twitter_style` and `brand_voice`.
|
||||
|
||||
Content types to rotate (7 types):
|
||||
1. **Hot take**: Strong opinion on a trending topic (1 tweet)
|
||||
2. **Thread**: Deep dive on a topic (3-10 tweets) — only if `thread_mode` enabled
|
||||
3. **Tip/How-to**: Actionable advice (1-2 tweets)
|
||||
4. **Question**: Engagement-driving question (1 tweet)
|
||||
5. **Curated share**: Link + insight from web research (1 tweet)
|
||||
6. **Story/Anecdote**: Personal-style narrative (1-3 tweets)
|
||||
7. **Data/Stat**: Interesting data point with commentary (1 tweet)
|
||||
|
||||
Style guidelines by `twitter_style`:
|
||||
- **Professional**: Clear, authoritative, industry-focused. Use data. Minimal emojis.
|
||||
- **Casual**: Conversational, relatable, lowercase okay. Natural emojis.
|
||||
- **Witty**: Clever wordplay, unexpected angles, humor. Punchy sentences.
|
||||
- **Educational**: Step-by-step, "Here's what most people get wrong about X". Numbered lists.
|
||||
- **Provocative**: Contrarian takes, challenges assumptions. "Unpopular opinion:" format.
|
||||
- **Inspirational**: Vision-focused, empowering, story-driven. Strategic emoji use.
|
||||
|
||||
Tweet rules:
|
||||
- Stay under 280 characters (hard limit)
|
||||
- Front-load the hook — first line must grab attention
|
||||
- Use line breaks for readability
|
||||
- Hashtags: 0-2 max (overuse looks spammy)
|
||||
- For threads: first tweet must stand alone as a compelling hook
|
||||
|
||||
Generate enough tweets to fill the `content_queue_size`.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Content Queue & Posting
|
||||
|
||||
If `approval_mode` is ENABLED:
|
||||
1. Write generated tweets to `twitter_queue.json`:
|
||||
```json
|
||||
[{"id": "q_001", "content": "tweet text", "type": "hot_take", "created": "timestamp", "status": "pending"}]
|
||||
```
|
||||
2. Write a human-readable `twitter_queue_preview.md` for easy review
|
||||
3. event_publish "twitter_queue_updated" with queue size
|
||||
4. Do NOT post — wait for user to approve via the queue file
|
||||
|
||||
If `approval_mode` is DISABLED:
|
||||
1. Post each tweet at its scheduled time via the API:
|
||||
```
|
||||
curl -s -X POST "https://api.twitter.com/2/tweets" \
|
||||
-H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text": "tweet content here"}' \
|
||||
-o tweet_response.json
|
||||
```
|
||||
2. For threads, post sequentially using `reply.in_reply_to_tweet_id`:
|
||||
```
|
||||
curl -s -X POST "https://api.twitter.com/2/tweets" \
|
||||
-H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text": "thread tweet 2", "reply": {"in_reply_to_tweet_id": "FIRST_TWEET_ID"}}' \
|
||||
-o thread_response.json
|
||||
```
|
||||
3. Log each posted tweet to `twitter_posted.json`
|
||||
4. Respect rate limits: max 300 tweets per 3 hours (Twitter v2 limit)
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Engagement
|
||||
|
||||
During `engagement_hours`, if `auto_reply` or `auto_like` is enabled:
|
||||
|
||||
Check mentions:
|
||||
```
|
||||
curl -s -H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
"https://api.twitter.com/2/users/USER_ID/mentions?max_results=10&tweet.fields=public_metrics,created_at" \
|
||||
-o mentions.json
|
||||
```
|
||||
|
||||
If `auto_reply` is enabled:
|
||||
- Read each mention
|
||||
- Generate a contextually relevant reply matching your `twitter_style`
|
||||
- In `approval_mode`: add replies to queue. Otherwise post directly.
|
||||
- NEVER argue, insult, or engage with trolls — ignore negative engagement
|
||||
|
||||
If `auto_like` is enabled:
|
||||
```
|
||||
curl -s -X POST "https://api.twitter.com/2/users/USER_ID/likes" \
|
||||
-H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"tweet_id": "TWEET_ID"}'
|
||||
```
|
||||
- Like tweets from people who engage with you
|
||||
- Like relevant content from people in your network
|
||||
- Max 50 likes per cycle to avoid rate limits
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Performance Tracking
|
||||
|
||||
Check performance of recent tweets:
|
||||
```
|
||||
curl -s -H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
"https://api.twitter.com/2/tweets?ids=ID1,ID2,ID3&tweet.fields=public_metrics" \
|
||||
-o performance.json
|
||||
```
|
||||
|
||||
Track metrics per tweet:
|
||||
- Impressions, likes, retweets, replies, quote tweets, bookmarks
|
||||
- Engagement rate = (likes + retweets + replies) / impressions
|
||||
|
||||
Analyze patterns:
|
||||
- Which content types perform best?
|
||||
- Which posting times get most engagement?
|
||||
- Which topics resonate most?
|
||||
|
||||
Store insights in knowledge graph for future content optimization.
|
||||
|
||||
---
|
||||
|
||||
## Phase 7 — State Persistence
|
||||
|
||||
1. Save tweet queue to `twitter_queue.json`
|
||||
2. Save posting history to `twitter_posted.json`
|
||||
3. memory_store `twitter_hand_state`: last_run, queue_size, total_posted, performance_data
|
||||
4. Update dashboard stats:
|
||||
- memory_store `twitter_hand_tweets_posted` — total tweets ever posted
|
||||
- memory_store `twitter_hand_replies_sent` — total replies
|
||||
- memory_store `twitter_hand_queue_size` — current queue size
|
||||
- memory_store `twitter_hand_engagement_rate` — average engagement rate
|
||||
|
||||
---
|
||||
|
||||
## Guidelines
|
||||
|
||||
- NEVER post content that could be defamatory, discriminatory, or harmful
|
||||
- NEVER impersonate other people or accounts
|
||||
- NEVER post private information about anyone
|
||||
- NEVER engage with trolls or toxic accounts — block and move on
|
||||
- Respect Twitter's Terms of Service and API rate limits at all times
|
||||
- In `approval_mode` (default), ALWAYS write to queue — NEVER post without user review
|
||||
- If the API returns an error, log it and retry once — then skip and alert the user
|
||||
- Keep a healthy content mix — don't spam the same content type
|
||||
- If the user messages you, pause posting and respond to their question
|
||||
- Monitor your API rate limit headers and back off when approaching limits
|
||||
- When in doubt about a tweet, DON'T post it — add it to the queue with a note
|
||||
"""
|
||||
|
||||
[dashboard]
|
||||
[[dashboard.metrics]]
|
||||
label = "Tweets Posted"
|
||||
memory_key = "twitter_hand_tweets_posted"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Replies Sent"
|
||||
memory_key = "twitter_hand_replies_sent"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Queue Size"
|
||||
memory_key = "twitter_hand_queue_size"
|
||||
format = "number"
|
||||
|
||||
[[dashboard.metrics]]
|
||||
label = "Engagement Rate"
|
||||
memory_key = "twitter_hand_engagement_rate"
|
||||
format = "percentage"
|
||||
361
crates/openfang-hands/bundled/twitter/SKILL.md
Normal file
361
crates/openfang-hands/bundled/twitter/SKILL.md
Normal file
@@ -0,0 +1,361 @@
|
||||
---
|
||||
name: twitter-hand-skill
|
||||
version: "1.0.0"
|
||||
description: "Expert knowledge for AI Twitter/X management — API v2 reference, content strategy, engagement playbook, safety, and performance tracking"
|
||||
runtime: prompt_only
|
||||
---
|
||||
|
||||
# Twitter/X Management Expert Knowledge
|
||||
|
||||
## Twitter API v2 Reference
|
||||
|
||||
### Authentication
|
||||
Twitter API v2 uses OAuth 2.0 Bearer Token for app-level access and OAuth 1.0a for user-level actions.
|
||||
|
||||
**Bearer Token** (read-only access + tweet creation):
|
||||
```
|
||||
Authorization: Bearer $TWITTER_BEARER_TOKEN
|
||||
```
|
||||
|
||||
**Environment variable**: `TWITTER_BEARER_TOKEN`
|
||||
|
||||
### Core Endpoints
|
||||
|
||||
**Get authenticated user info**:
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
"https://api.twitter.com/2/users/me"
|
||||
```
|
||||
Response: `{"data": {"id": "123", "name": "User", "username": "user"}}`
|
||||
|
||||
**Post a tweet**:
|
||||
```bash
|
||||
curl -s -X POST "https://api.twitter.com/2/tweets" \
|
||||
-H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text": "Hello world!"}'
|
||||
```
|
||||
Response: `{"data": {"id": "tweet_id", "text": "Hello world!"}}`
|
||||
|
||||
**Post a reply**:
|
||||
```bash
|
||||
curl -s -X POST "https://api.twitter.com/2/tweets" \
|
||||
-H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"text": "Great point!", "reply": {"in_reply_to_tweet_id": "PARENT_TWEET_ID"}}'
|
||||
```
|
||||
|
||||
**Post a thread** (chain of replies to yourself):
|
||||
1. Post first tweet → get `tweet_id`
|
||||
2. Post second tweet with `reply.in_reply_to_tweet_id` = first tweet_id
|
||||
3. Repeat for each tweet in thread
|
||||
|
||||
**Delete a tweet**:
|
||||
```bash
|
||||
curl -s -X DELETE "https://api.twitter.com/2/tweets/TWEET_ID" \
|
||||
-H "Authorization: Bearer $TWITTER_BEARER_TOKEN"
|
||||
```
|
||||
|
||||
**Like a tweet**:
|
||||
```bash
|
||||
curl -s -X POST "https://api.twitter.com/2/users/USER_ID/likes" \
|
||||
-H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"tweet_id": "TARGET_TWEET_ID"}'
|
||||
```
|
||||
|
||||
**Get mentions**:
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
"https://api.twitter.com/2/users/USER_ID/mentions?max_results=10&tweet.fields=public_metrics,created_at,author_id"
|
||||
```
|
||||
|
||||
**Search recent tweets**:
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
"https://api.twitter.com/2/tweets/search/recent?query=QUERY&max_results=10&tweet.fields=public_metrics"
|
||||
```
|
||||
|
||||
**Get tweet metrics**:
|
||||
```bash
|
||||
curl -s -H "Authorization: Bearer $TWITTER_BEARER_TOKEN" \
|
||||
"https://api.twitter.com/2/tweets?ids=ID1,ID2,ID3&tweet.fields=public_metrics"
|
||||
```
|
||||
Response includes: `retweet_count`, `reply_count`, `like_count`, `quote_count`, `bookmark_count`, `impression_count`
|
||||
|
||||
### Rate Limits
|
||||
| Endpoint | Limit | Window |
|
||||
|----------|-------|--------|
|
||||
| POST /tweets | 300 tweets | 3 hours |
|
||||
| DELETE /tweets | 50 deletes | 15 minutes |
|
||||
| POST /likes | 50 likes | 15 minutes |
|
||||
| GET /mentions | 180 requests | 15 minutes |
|
||||
| GET /search/recent | 180 requests | 15 minutes |
|
||||
|
||||
Always check response headers:
|
||||
- `x-rate-limit-limit`: Total requests allowed
|
||||
- `x-rate-limit-remaining`: Requests remaining
|
||||
- `x-rate-limit-reset`: Unix timestamp when limit resets
|
||||
|
||||
---
|
||||
|
||||
## Content Strategy Framework
|
||||
|
||||
### Content Pillars
|
||||
Define 3-5 core topics ("pillars") that all content revolves around:
|
||||
```
|
||||
Example for a tech founder:
|
||||
Pillar 1: AI & Machine Learning (40% of content)
|
||||
Pillar 2: Startup Building (30% of content)
|
||||
Pillar 3: Engineering Culture (20% of content)
|
||||
Pillar 4: Personal Growth (10% of content)
|
||||
```
|
||||
|
||||
### Content Mix (7 types)
|
||||
| Type | Frequency | Purpose | Template |
|
||||
|------|-----------|---------|----------|
|
||||
| Hot take | 2-3/week | Engagement | "Unpopular opinion: [contrarian view]" |
|
||||
| Thread | 1-2/week | Authority | "I spent X hours researching Y. Here's what I found:" |
|
||||
| Tip/How-to | 2-3/week | Value | "How to [solve problem] in [N] steps:" |
|
||||
| Question | 1-2/week | Engagement | "[Interesting question]? I'll go first:" |
|
||||
| Curated share | 1-2/week | Curation | "This [article/tool/repo] is a game changer for [audience]:" |
|
||||
| Story | 1/week | Connection | "3 years ago I [relatable experience]. Here's what happened:" |
|
||||
| Data/Stat | 1/week | Authority | "[Surprising statistic]. Here's why it matters:" |
|
||||
|
||||
### Optimal Posting Times (UTC-based, adjust to audience timezone)
|
||||
| Day | Best Times | Why |
|
||||
|-----|-----------|-----|
|
||||
| Monday | 8-10 AM | Start of work week, checking feeds |
|
||||
| Tuesday | 10 AM, 1 PM | Peak engagement day |
|
||||
| Wednesday | 9 AM, 12 PM | Mid-week focus |
|
||||
| Thursday | 10 AM, 2 PM | Second-highest engagement day |
|
||||
| Friday | 9-11 AM | Morning only, engagement drops PM |
|
||||
| Saturday | 10 AM | Casual browsing |
|
||||
| Sunday | 4-6 PM | Pre-work-week planning |
|
||||
|
||||
---
|
||||
|
||||
## Tweet Writing Best Practices
|
||||
|
||||
### The Hook (first line is everything)
|
||||
Hooks that work:
|
||||
- **Contrarian**: "Most people think X. They're wrong."
|
||||
- **Number**: "I analyzed 500 [things]. Here's what I found:"
|
||||
- **Question**: "Why do 90% of [things] fail?"
|
||||
- **Story**: "In 2019, I almost [dramatic thing]."
|
||||
- **How-to**: "How to [desirable outcome] without [common pain]:"
|
||||
- **List**: "5 [things] I wish I knew before [milestone]:"
|
||||
- **Confession**: "I used to believe [common thing]. Then I learned..."
|
||||
|
||||
### Writing Rules
|
||||
1. **One idea per tweet** — don't try to cover everything
|
||||
2. **Front-load value** — the hook must deliver or promise value
|
||||
3. **Use line breaks** — no wall of text, 1-2 sentences per line
|
||||
4. **280 character limit** — every word must earn its place
|
||||
5. **Active voice** — "We shipped X" not "X was shipped by us"
|
||||
6. **Specific > vague** — "3x faster" not "much faster"
|
||||
7. **End with a call to action** — "Agree? RT" or "What would you add?"
|
||||
|
||||
### Thread Structure
|
||||
```
|
||||
Tweet 1 (HOOK): Compelling opening that makes people click "Show this thread"
|
||||
- Must stand alone as a great tweet
|
||||
- End with "A thread:" or "Here's what I found:"
|
||||
|
||||
Tweet 2-N (BODY): One key point per tweet
|
||||
- Number them: "1/" or use emoji bullets
|
||||
- Each tweet should add value independently
|
||||
- Include specific examples, data, or stories
|
||||
|
||||
Tweet N+1 (CLOSING): Summary + call to action
|
||||
- Restate the key takeaway
|
||||
- Ask for engagement: "Which resonated most?"
|
||||
- Self-reference: "If this was useful, follow @handle for more"
|
||||
```
|
||||
|
||||
### Hashtag Strategy
|
||||
- **0-2 hashtags** per tweet (more looks spammy)
|
||||
- Use hashtags for discovery, not decoration
|
||||
- Mix broad (#AI) and specific (#LangChain)
|
||||
- Never use hashtags in threads (except maybe tweet 1)
|
||||
- Research trending hashtags in your niche before using them
|
||||
|
||||
---
|
||||
|
||||
## Engagement Playbook
|
||||
|
||||
### Replying to Mentions
|
||||
Rules:
|
||||
1. **Respond within 2 hours** during engagement_hours
|
||||
2. **Add value** — don't just say "thanks!" — expand on their point
|
||||
3. **Ask a follow-up question** — drives conversation
|
||||
4. **Be genuine** — match their energy and tone
|
||||
5. **Never argue** — if someone is hostile, ignore or block
|
||||
|
||||
Reply templates:
|
||||
- Agreement: "Great point! I'd also add [related insight]"
|
||||
- Question: "Interesting question. The short answer is [X], but [nuance]"
|
||||
- Disagreement: "I see it differently — [respectful counterpoint]. What's your experience?"
|
||||
- Gratitude: "Appreciate you sharing this! [Specific thing you liked about their tweet]"
|
||||
|
||||
### When NOT to Engage
|
||||
- Trolls or obviously bad-faith arguments
|
||||
- Political flame wars (unless that's your content pillar)
|
||||
- Personal attacks (block immediately)
|
||||
- Spam or bot accounts
|
||||
- Tweets that could create legal liability
|
||||
|
||||
### Auto-Like Strategy
|
||||
Like tweets from:
|
||||
1. People who regularly engage with your content (reciprocity)
|
||||
2. Influencers in your niche (visibility)
|
||||
3. Thoughtful content related to your pillars (curation signal)
|
||||
4. Replies to your tweets (encourages more replies)
|
||||
|
||||
Do NOT auto-like:
|
||||
- Controversial or political content
|
||||
- Content you haven't actually read
|
||||
- Spam or low-quality threads
|
||||
- Competitor criticism (looks petty)
|
||||
|
||||
---
|
||||
|
||||
## Content Calendar Template
|
||||
|
||||
```
|
||||
WEEK OF [DATE]
|
||||
|
||||
Monday:
|
||||
- 8 AM: [Tip/How-to] about [Pillar 1]
|
||||
- 12 PM: [Curated share] related to [Pillar 2]
|
||||
|
||||
Tuesday:
|
||||
- 10 AM: [Thread] deep dive on [Pillar 1]
|
||||
- 2 PM: [Hot take] about [trending topic]
|
||||
|
||||
Wednesday:
|
||||
- 9 AM: [Question] to audience about [Pillar 3]
|
||||
- 1 PM: [Data/Stat] about [Pillar 2]
|
||||
|
||||
Thursday:
|
||||
- 10 AM: [Story] about [personal experience in Pillar 3]
|
||||
- 3 PM: [Tip/How-to] about [Pillar 1]
|
||||
|
||||
Friday:
|
||||
- 9 AM: [Hot take] about [week's trending topic]
|
||||
- 11 AM: [Curated share] — best thing I read this week
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Metrics
|
||||
|
||||
### Key Metrics
|
||||
| Metric | What It Measures | Good Benchmark |
|
||||
|--------|-----------------|----------------|
|
||||
| Impressions | How many people saw the tweet | Varies by follower count |
|
||||
| Engagement rate | (likes+RTs+replies)/impressions | >2% is good, >5% is great |
|
||||
| Reply rate | replies/impressions | >0.5% is good |
|
||||
| Retweet rate | RTs/impressions | >1% is good |
|
||||
| Profile visits | People checking your profile after tweet | Track trend |
|
||||
| Follower growth | Net new followers per period | Track trend |
|
||||
|
||||
### Engagement Rate Formula
|
||||
```
|
||||
engagement_rate = (likes + retweets + replies + quotes) / impressions * 100
|
||||
|
||||
Example:
|
||||
50 likes + 10 RTs + 5 replies + 2 quotes = 67 engagements
|
||||
67 / 2000 impressions = 3.35% engagement rate
|
||||
```
|
||||
|
||||
### Content Performance Analysis
|
||||
Track which content types and topics perform best:
|
||||
```
|
||||
| Content Type | Avg Impressions | Avg Engagement Rate | Best Performing |
|
||||
|-------------|-----------------|--------------------|--------------------|
|
||||
| Hot take | 2500 | 4.2% | "Unpopular opinion: ..." |
|
||||
| Thread | 5000 | 3.1% | "I analyzed 500 ..." |
|
||||
| Tip | 1800 | 5.5% | "How to ... in 3 steps" |
|
||||
```
|
||||
|
||||
Use this data to optimize future content mix.
|
||||
|
||||
---
|
||||
|
||||
## Brand Voice Guide
|
||||
|
||||
### Voice Dimensions
|
||||
| Dimension | Range | Description |
|
||||
|-----------|-------|-------------|
|
||||
| Formal ↔ Casual | 1-5 | 1=corporate, 5=texting a friend |
|
||||
| Serious ↔ Humorous | 1-5 | 1=all business, 5=comedy account |
|
||||
| Reserved ↔ Bold | 1-5 | 1=diplomatic, 5=no-filter |
|
||||
| General ↔ Technical | 1-5 | 1=anyone can understand, 5=deep expert |
|
||||
|
||||
### Consistency Rules
|
||||
- Use the same voice across ALL tweets (hot takes and how-tos)
|
||||
- Develop 3-5 "signature phrases" you reuse naturally
|
||||
- If the brand voice says "casual," don't suddenly write a formal thread
|
||||
- Read tweets aloud — does it sound like the same person?
|
||||
|
||||
---
|
||||
|
||||
## Safety & Compliance
|
||||
|
||||
### Content Guidelines
|
||||
NEVER post:
|
||||
- Discriminatory content (race, gender, religion, sexuality, disability)
|
||||
- Defamatory claims about real people or companies
|
||||
- Private or confidential information
|
||||
- Threats, harassment, or incitement to violence
|
||||
- Impersonation of other accounts
|
||||
- Misleading claims presented as fact
|
||||
- Content that violates Twitter Terms of Service
|
||||
|
||||
### Approval Mode Queue Format
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "q_001",
|
||||
"content": "Tweet text here",
|
||||
"type": "hot_take",
|
||||
"pillar": "AI",
|
||||
"scheduled_for": "2025-01-15T10:00:00Z",
|
||||
"created": "2025-01-14T20:00:00Z",
|
||||
"status": "pending",
|
||||
"notes": "Based on trending discussion about LLM pricing"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
Preview file for human review:
|
||||
```markdown
|
||||
# Tweet Queue Preview
|
||||
Generated: YYYY-MM-DD
|
||||
|
||||
## Pending Tweets (N total)
|
||||
|
||||
### 1. [Hot Take] — Scheduled: Mon 10 AM
|
||||
> Tweet text here
|
||||
|
||||
**Notes**: Based on trending discussion about LLM pricing
|
||||
**Pillar**: AI | **Status**: Pending approval
|
||||
|
||||
---
|
||||
|
||||
### 2. [Thread] — Scheduled: Tue 10 AM
|
||||
> Tweet 1/5: Hook text here
|
||||
> Tweet 2/5: Point one
|
||||
> ...
|
||||
|
||||
**Notes**: Deep dive on new benchmark results
|
||||
**Pillar**: AI | **Status**: Pending approval
|
||||
```
|
||||
|
||||
### Risk Assessment
|
||||
Before posting, evaluate each tweet:
|
||||
- Could this be misinterpreted? → Rephrase for clarity
|
||||
- Does this punch down? → Don't post
|
||||
- Would you be comfortable seeing this attributed to the user in a news article? → If no, don't post
|
||||
- Is this verifiably true? → If not sure, add hedging language or don't post
|
||||
Reference in New Issue
Block a user