初始化提交

2026-03-01 16:24:24 +08:00
commit 92e5def702
492 changed files with 211343 additions and 0 deletions
--- a/crates/openfang-hands/bundled/lead/HAND.toml
+++ b/crates/openfang-hands/bundled/lead/HAND.toml
@@ -0,0 +1,335 @@
+id = "lead"
+name = "Lead Hand"
+description = "Autonomous lead generation — discovers, enriches, and delivers qualified leads on a schedule"
+category = "data"
+icon = "\U0001F4CA"
+tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query"]
+
+# ─── Configurable settings ───────────────────────────────────────────────────
+
+[[settings]]
+key = "target_industry"
+label = "Target Industry"
+description = "Industry vertical to focus on (e.g. SaaS, fintech, healthcare, e-commerce)"
+setting_type = "text"
+default = ""
+
+[[settings]]
+key = "target_role"
+label = "Target Role"
+description = "Decision-maker titles to target (e.g. CTO, VP Engineering, Head of Product)"
+setting_type = "text"
+default = ""
+
+[[settings]]
+key = "company_size"
+label = "Company Size"
+description = "Filter leads by company size"
+setting_type = "select"
+default = "any"
+
+[[settings.options]]
+value = "any"
+label = "Any size"
+
+[[settings.options]]
+value = "startup"
+label = "Startup (1-50)"
+
+[[settings.options]]
+value = "smb"
+label = "SMB (50-500)"
+
+[[settings.options]]
+value = "enterprise"
+label = "Enterprise (500+)"
+
+[[settings]]
+key = "lead_source"
+label = "Lead Source"
+description = "Primary method for discovering leads"
+setting_type = "select"
+default = "web_search"
+
+[[settings.options]]
+value = "web_search"
+label = "Web Search"
+
+[[settings.options]]
+value = "linkedin_public"
+label = "LinkedIn (public profiles)"
+
+[[settings.options]]
+value = "crunchbase"
+label = "Crunchbase"
+
+[[settings.options]]
+value = "custom"
+label = "Custom (specify in prompt)"
+
+[[settings]]
+key = "output_format"
+label = "Output Format"
+description = "Report delivery format"
+setting_type = "select"
+default = "csv"
+
+[[settings.options]]
+value = "csv"
+label = "CSV"
+
+[[settings.options]]
+value = "json"
+label = "JSON"
+
+[[settings.options]]
+value = "markdown_table"
+label = "Markdown Table"
+
+[[settings]]
+key = "leads_per_report"
+label = "Leads Per Report"
+description = "Number of leads to include in each report"
+setting_type = "select"
+default = "25"
+
+[[settings.options]]
+value = "10"
+label = "10 leads"
+
+[[settings.options]]
+value = "25"
+label = "25 leads"
+
+[[settings.options]]
+value = "50"
+label = "50 leads"
+
+[[settings.options]]
+value = "100"
+label = "100 leads"
+
+[[settings]]
+key = "delivery_schedule"
+label = "Delivery Schedule"
+description = "When to generate and deliver lead reports"
+setting_type = "select"
+default = "daily_9am"
+
+[[settings.options]]
+value = "daily_7am"
+label = "Daily at 7 AM"
+
+[[settings.options]]
+value = "daily_9am"
+label = "Daily at 9 AM"
+
+[[settings.options]]
+value = "weekdays_8am"
+label = "Weekdays at 8 AM"
+
+[[settings.options]]
+value = "weekly_monday"
+label = "Weekly on Monday"
+
+[[settings]]
+key = "geo_focus"
+label = "Geographic Focus"
+description = "Geographic region to prioritize (e.g. US, Europe, APAC, global)"
+setting_type = "text"
+default = ""
+
+[[settings]]
+key = "enrichment_depth"
+label = "Enrichment Depth"
+description = "How much context to gather per lead"
+setting_type = "select"
+default = "standard"
+
+[[settings.options]]
+value = "basic"
+label = "Basic (name, title, company)"
+
+[[settings.options]]
+value = "standard"
+label = "Standard (+ company size, industry, tech stack)"
+
+[[settings.options]]
+value = "deep"
+label = "Deep (+ funding, recent news, social profiles)"
+
+# ─── Agent configuration ─────────────────────────────────────────────────────
+
+[agent]
+name = "lead-hand"
+description = "AI lead generation engine — discovers, enriches, deduplicates, and delivers qualified leads on your schedule"
+module = "builtin:chat"
+provider = "default"
+model = "default"
+max_tokens = 16384
+temperature = 0.3
+max_iterations = 50
+system_prompt = """You are Lead Hand — an autonomous lead generation engine that discovers, enriches, and delivers qualified leads 24/7.
+
+## Phase 0 — Platform Detection (ALWAYS DO THIS FIRST)
+
+Before running any command, detect the operating system:
+```
+python -c "import platform; print(platform.system())"
+```
+Then set your approach:
+- **Windows**: paths use forward slashes in Python, `del` for cleanup
+- **macOS / Linux**: standard Unix paths, `rm` for cleanup
+
+---
+
+## Phase 1 — State Recovery & Schedule Setup
+
+On first run:
+1. Check memory_recall for `lead_hand_state` — if it exists, you're resuming
+2. Read the **User Configuration** section for target_industry, target_role, company_size, geo_focus, etc.
+3. Create your delivery schedule using schedule_create based on `delivery_schedule` setting
+4. Load any existing lead database from `leads_database.json` via file_read (if it exists)
+
+On subsequent runs:
+1. Recall `lead_hand_state` from memory — load your cumulative lead database
+2. Check if this is a scheduled run or a user-triggered run
+3. Load the existing leads database to avoid duplicates
+
+---
+
+## Phase 2 — Target Profile Construction
+
+Build an Ideal Customer Profile (ICP) from user settings:
+- Industry: from `target_industry` setting
+- Decision-maker roles: from `target_role` setting
+- Company size filter: from `company_size` setting
+- Geography: from `geo_focus` setting
+
+Store the ICP in the knowledge graph:
+- knowledge_add_entity: ICP profile node
+- knowledge_add_relation: link ICP to target attributes
+
+---
+
+## Phase 3 — Lead Discovery
+
+Execute a multi-query web research loop:
+1. Construct 5-10 search queries combining industry + role + signals:
+   - "[industry] [role] hiring" (growth signal)
+   - "[industry] companies series [A/B/C] funding" (funded companies)
+   - "[industry] companies [geo] list" (geographic targeting)
+   - "top [industry] startups 2024 2025" (emerging companies)
+   - "[company_size] [industry] companies [geo]" (size-filtered)
+2. For each query, use web_search to find results
+3. For promising results, use web_fetch to extract company/person details
+4. Extract structured lead data: name, title, company, company_url, linkedin_url (if public), email pattern
+
+Target: discover 2-3x the `leads_per_report` setting to allow for filtering.
+
+---
+
+## Phase 4 — Lead Enrichment
+
+For each discovered lead, based on `enrichment_depth`:
+
+**Basic**: name, title, company — already have this from discovery
+**Standard**: additionally fetch:
+- Company website (web_fetch company_url) — extract: employee count, industry, tech stack, product description
+- Look for company on job boards — hiring signals indicate growth
+**Deep**: additionally fetch:
+- Recent funding news (web_search "[company] funding round")
+- Recent company news (web_search "[company] news 2025")
+- Social profiles (web_search "[person name] [company] linkedin twitter")
+
+Store enriched entities in knowledge graph:
+- knowledge_add_entity for each lead and company
+- knowledge_add_relation for lead→company, company→industry relationships
+
+---
+
+## Phase 5 — Deduplication & Scoring
+
+1. Compare new leads against existing `leads_database.json`:
+   - Match on: normalized company name + person name
+   - Skip exact duplicates
+   - Update existing leads with new enrichment data
+2. Score each lead (0-100):
+   - ICP match: +30 (industry, role, size, geo all match)
+   - Growth signals: +20 (hiring, funding, news)
+   - Enrichment completeness: +20 (all fields populated)
+   - Recency: +15 (company active recently)
+   - Accessibility: +15 (public contact info available)
+3. Sort by score descending
+4. Take top N leads per `leads_per_report` setting
+
+---
+
+## Phase 6 — Report Generation
+
+Generate the report in the configured `output_format`:
+
+**CSV format**:
+```csv
+Name,Title,Company,Company URL,Industry,Company Size,Score,Discovery Date,Notes
+```
+
+**JSON format**:
+```json
+[{"name": "...", "title": "...", "company": "...", "company_url": "...", "industry": "...", "size": "...", "score": 85, "discovered": "2025-01-15", "enrichment": {...}}]
+```
+
+**Markdown Table format**:
+```markdown
+| # | Name | Title | Company | Score | Signal |
+|---|------|-------|---------|-------|--------|
+```
+
+Save report to: `lead_report_YYYY-MM-DD.{csv,json,md}`
+
+---
+
+## Phase 7 — State Persistence
+
+After each run:
+1. Update `leads_database.json` with all known leads (new + existing)
+2. memory_store `lead_hand_state` with: last_run, total_leads, report_count
+3. Update dashboard stats:
+   - memory_store `lead_hand_leads_found` — total unique leads discovered
+   - memory_store `lead_hand_reports_generated` — increment report count
+   - memory_store `lead_hand_last_report_date` — today's date
+   - memory_store `lead_hand_unique_companies` — count of unique companies
+
+---
+
+## Guidelines
+
+- NEVER fabricate lead data — every field must come from actual web research
+- Respect robots.txt and rate limits — add delays between fetches if needed
+- Do NOT scrape behind login walls — only use publicly available information
+- If a search yields no results, try alternative queries before giving up
+- Always deduplicate before reporting — users hate seeing the same lead twice
+- Include your confidence level for enriched data (e.g. "email pattern: likely" vs "email: verified")
+- If the user messages you directly, pause the pipeline and respond to their question
+"""
+
+[dashboard]
+[[dashboard.metrics]]
+label = "Leads Found"
+memory_key = "lead_hand_leads_found"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Reports Generated"
+memory_key = "lead_hand_reports_generated"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Last Report"
+memory_key = "lead_hand_last_report_date"
+format = "text"
+
+[[dashboard.metrics]]
+label = "Unique Companies"
+memory_key = "lead_hand_unique_companies"
+format = "number"
--- a/crates/openfang-hands/bundled/lead/SKILL.md
+++ b/crates/openfang-hands/bundled/lead/SKILL.md
@@ -0,0 +1,235 @@
+---
+name: lead-hand-skill
+version: "1.0.0"
+description: "Expert knowledge for AI lead generation — web research, enrichment, scoring, deduplication, and report generation"
+runtime: prompt_only
+---
+
+# Lead Generation Expert Knowledge
+
+## Ideal Customer Profile (ICP) Construction
+
+A good ICP answers these questions:
+1. **Industry**: What vertical does your ideal customer operate in?
+2. **Company size**: How many employees? What revenue range?
+3. **Geography**: Where are they located?
+4. **Technology**: What tech stack do they use?
+5. **Budget signals**: Are they funded? Growing? Hiring?
+6. **Decision-maker**: Who has buying authority? (title, seniority)
+7. **Pain points**: What problems does your product solve for them?
+
+### Company Size Categories
+| Category | Employees | Typical Budget | Sales Cycle |
+|----------|-----------|---------------|-------------|
+| Startup | 1-50 | $1K-$25K/yr | 1-4 weeks |
+| SMB | 50-500 | $25K-$250K/yr | 1-3 months |
+| Enterprise | 500+ | $250K+/yr | 3-12 months |
+
+---
+
+## Web Research Techniques for Lead Discovery
+
+### Search Query Patterns
+```
+# Find companies in a vertical
+"[industry] companies" site:crunchbase.com
+"top [industry] startups [year]"
+"[industry] companies [city/region]"
+
+# Find decision-makers
+"[title]" "[company]" site:linkedin.com
+"[company] team" OR "[company] about us" OR "[company] leadership"
+
+# Growth signals (high-intent leads)
+"[company] hiring [role]" — indicates budget and growth
+"[company] series [A/B/C]" — recently funded
+"[company] expansion" OR "[company] new office"
+"[company] product launch [year]"
+
+# Technology signals
+"[company] uses [technology]" OR "[company] built with [technology]"
+site:stackshare.io "[company]"
+site:builtwith.com "[company]"
+```
+
+### Source Quality Ranking
+1. **Company website** (About/Team pages) — most reliable for personnel
+2. **Crunchbase** — funding, company details, leadership
+3. **LinkedIn** (public profiles) — titles, tenure, connections
+4. **Press releases** — announcements, partnerships, funding
+5. **Job boards** — hiring signals, tech stack requirements
+6. **Industry directories** — comprehensive company lists
+7. **News articles** — recent activity, reputation
+8. **Social media** — engagement, company culture
+
+---
+
+## Lead Enrichment Patterns
+
+### Basic Enrichment (always available)
+- Full name (first + last)
+- Job title
+- Company name
+- Company website URL
+
+### Standard Enrichment
+- Company employee count (from About page, Crunchbase, or LinkedIn)
+- Company industry classification
+- Company founding year
+- Technology stack (from job postings, StackShare, BuiltWith)
+- Social profiles (LinkedIn URL, Twitter handle)
+- Company description (from meta tags or About page)
+
+### Deep Enrichment
+- Recent funding rounds (amount, investors, date)
+- Recent news mentions (last 90 days)
+- Key competitors
+- Estimated revenue range
+- Recent job postings (growth signals)
+- Company blog/content activity (engagement level)
+- Executive team changes
+
+### Email Pattern Discovery
+Common corporate email formats (try in order):
+1. `firstname@company.com` (most common for small companies)
+2. `firstname.lastname@company.com` (most common for larger companies)
+3. `first_initial+lastname@company.com` (e.g., jsmith@)
+4. `firstname+last_initial@company.com` (e.g., johns@)
+
+Note: NEVER send unsolicited emails. Email patterns are for reference only.
+
+---
+
+## Lead Scoring Framework
+
+### Scoring Rubric (0-100)
+```
+ICP Match (30 points max):
+  Industry match:     +10
+  Company size match: +5
+  Geography match:    +5
+  Role/title match:   +10
+
+Growth Signals (20 points max):
+  Recent funding:     +8
+  Actively hiring:    +6
+  Product launch:     +3
+  Press coverage:     +3
+
+Enrichment Quality (20 points max):
+  Email found:        +5
+  LinkedIn found:     +5
+  Full company data:  +5
+  Tech stack known:   +5
+
+Recency (15 points max):
+  Active this month:  +15
+  Active this quarter:+10
+  Active this year:   +5
+  No recent activity: +0
+
+Accessibility (15 points max):
+  Direct contact:     +15
+  Company contact:    +10
+  Social only:        +5
+  No contact info:    +0
+```
+
+### Score Interpretation
+| Score | Grade | Action |
+|-------|-------|--------|
+| 80-100 | A | Hot lead — prioritize outreach |
+| 60-79 | B | Warm lead — nurture |
+| 40-59 | C | Cool lead — enrich further |
+| 0-39 | D | Cold lead — deprioritize |
+
+---
+
+## Deduplication Strategies
+
+### Matching Algorithm
+1. **Exact match**: Normalize company name (lowercase, strip Inc/LLC/Ltd) + person name
+2. **Fuzzy match**: Levenshtein distance < 2 on company name + same person
+3. **Domain match**: Same company website domain = same company
+4. **Cross-source merge**: Same person at same company from different sources → merge enrichment data
+
+### Normalization Rules
+```
+Company name:
+  - Strip legal suffixes: Inc, LLC, Ltd, Corp, Co, GmbH, AG, SA
+  - Lowercase
+  - Remove "The" prefix
+  - Collapse whitespace
+
+Person name:
+  - Lowercase
+  - Remove middle names/initials
+  - Handle "Bob" = "Robert", "Mike" = "Michael" (common nicknames)
+```
+
+---
+
+## Output Format Templates
+
+### CSV Format
+```csv
+Name,Title,Company,Company URL,LinkedIn,Industry,Size,Score,Discovered,Notes
+"Jane Smith","VP Engineering","Acme Corp","https://acme.com","https://linkedin.com/in/janesmith","SaaS","SMB (120 employees)",85,"2025-01-15","Series B funded, hiring 5 engineers"
+```
+
+### JSON Format
+```json
+[
+  {
+    "name": "Jane Smith",
+    "title": "VP Engineering",
+    "company": "Acme Corp",
+    "company_url": "https://acme.com",
+    "linkedin": "https://linkedin.com/in/janesmith",
+    "industry": "SaaS",
+    "company_size": "SMB",
+    "employee_count": 120,
+    "score": 85,
+    "discovered": "2025-01-15",
+    "enrichment": {
+      "funding": "Series B, $15M",
+      "hiring": true,
+      "tech_stack": ["React", "Python", "AWS"],
+      "recent_news": "Launched enterprise plan Q4 2024"
+    },
+    "notes": "Strong ICP match, actively growing"
+  }
+]
+```
+
+### Markdown Table Format
+```markdown
+| # | Name | Title | Company | Score | Key Signal |
+|---|------|-------|---------|-------|------------|
+| 1 | Jane Smith | VP Engineering | Acme Corp | 85 | Series B funded, hiring |
+| 2 | John Doe | CTO | Beta Inc | 72 | Product launch Q1 2025 |
+```
+
+---
+
+## Compliance & Ethics
+
+### DO
+- Use only publicly available information
+- Respect robots.txt and rate limits
+- Include data provenance (where each piece of info came from)
+- Allow users to export and delete their lead data
+- Clearly mark confidence levels on enriched data
+
+### DO NOT
+- Scrape behind login walls or paywalls
+- Fabricate any lead data (even "likely" email addresses without evidence)
+- Store sensitive personal data (SSN, financial info, health data)
+- Send unsolicited communications on behalf of the user
+- Bypass anti-scraping measures (CAPTCHAs, rate limits)
+- Collect data on individuals who have opted out of data collection
+
+### Data Retention
+- Keep lead data in local files only — never exfiltrate
+- Mark stale leads (>90 days without activity) for review
+- Provide clear data export in all supported formats