初始化提交

2026-03-01 16:24:24 +08:00
commit 92e5def702
492 changed files with 211343 additions and 0 deletions
--- a/crates/openfang-hands/bundled/collector/HAND.toml
+++ b/crates/openfang-hands/bundled/collector/HAND.toml
@@ -0,0 +1,345 @@
+id = "collector"
+name = "Collector Hand"
+description = "Autonomous intelligence collector — monitors any target continuously with change detection and knowledge graphs"
+category = "data"
+icon = "\U0001F50D"
+tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query", "event_publish"]
+
+# ─── Configurable settings ───────────────────────────────────────────────────
+
+[[settings]]
+key = "target_subject"
+label = "Target Subject"
+description = "What to monitor (company name, person, technology, market, topic)"
+setting_type = "text"
+default = ""
+
+[[settings]]
+key = "collection_depth"
+label = "Collection Depth"
+description = "How deep to dig on each cycle"
+setting_type = "select"
+default = "deep"
+
+[[settings.options]]
+value = "surface"
+label = "Surface (headlines only)"
+
+[[settings.options]]
+value = "deep"
+label = "Deep (full articles + sources)"
+
+[[settings.options]]
+value = "exhaustive"
+label = "Exhaustive (multi-hop research)"
+
+[[settings]]
+key = "update_frequency"
+label = "Update Frequency"
+description = "How often to run collection sweeps"
+setting_type = "select"
+default = "daily"
+
+[[settings.options]]
+value = "hourly"
+label = "Every hour"
+
+[[settings.options]]
+value = "every_6h"
+label = "Every 6 hours"
+
+[[settings.options]]
+value = "daily"
+label = "Daily"
+
+[[settings.options]]
+value = "weekly"
+label = "Weekly"
+
+[[settings]]
+key = "focus_area"
+label = "Focus Area"
+description = "Lens through which to analyze collected intelligence"
+setting_type = "select"
+default = "general"
+
+[[settings.options]]
+value = "market"
+label = "Market Intelligence"
+
+[[settings.options]]
+value = "business"
+label = "Business Intelligence"
+
+[[settings.options]]
+value = "competitor"
+label = "Competitor Analysis"
+
+[[settings.options]]
+value = "person"
+label = "Person Tracking"
+
+[[settings.options]]
+value = "technology"
+label = "Technology Monitoring"
+
+[[settings.options]]
+value = "general"
+label = "General Intelligence"
+
+[[settings]]
+key = "alert_on_changes"
+label = "Alert on Changes"
+description = "Publish an event when significant changes are detected"
+setting_type = "toggle"
+default = "true"
+
+[[settings]]
+key = "report_format"
+label = "Report Format"
+description = "Output format for intelligence reports"
+setting_type = "select"
+default = "markdown"
+
+[[settings.options]]
+value = "markdown"
+label = "Markdown"
+
+[[settings.options]]
+value = "json"
+label = "JSON"
+
+[[settings.options]]
+value = "html"
+label = "HTML"
+
+[[settings]]
+key = "max_sources_per_cycle"
+label = "Max Sources Per Cycle"
+description = "Maximum number of sources to process per collection sweep"
+setting_type = "select"
+default = "30"
+
+[[settings.options]]
+value = "10"
+label = "10 sources"
+
+[[settings.options]]
+value = "30"
+label = "30 sources"
+
+[[settings.options]]
+value = "50"
+label = "50 sources"
+
+[[settings.options]]
+value = "100"
+label = "100 sources"
+
+[[settings]]
+key = "track_sentiment"
+label = "Track Sentiment"
+description = "Analyze and track sentiment trends over time"
+setting_type = "toggle"
+default = "false"
+
+# ─── Agent configuration ─────────────────────────────────────────────────────
+
+[agent]
+name = "collector-hand"
+description = "AI intelligence collector — monitors any target continuously with OSINT techniques, knowledge graphs, and change detection"
+module = "builtin:chat"
+provider = "default"
+model = "default"
+max_tokens = 16384
+temperature = 0.3
+max_iterations = 60
+system_prompt = """You are Collector Hand — an autonomous intelligence collector that monitors any target 24/7, building a living knowledge graph and detecting changes over time.
+
+## Phase 0 — Platform Detection & State Recovery (ALWAYS DO THIS FIRST)
+
+Detect the operating system:
+```
+python -c "import platform; print(platform.system())"
+```
+
+Then recover state:
+1. memory_recall `collector_hand_state` — if it exists, load previous collection state
+2. Read the **User Configuration** for target_subject, focus_area, collection_depth, etc.
+3. file_read `collector_knowledge_base.json` if it exists — this is your cumulative intel
+4. knowledge_query for existing entities related to the target
+
+---
+
+## Phase 1 — Schedule & Target Initialization
+
+On first run:
+1. Create collection schedule using schedule_create based on `update_frequency`
+2. Parse the `target_subject` — identify what type of target it is:
+   - Company: look for products, leadership, funding, partnerships, news
+   - Person: look for publications, talks, job changes, social activity
+   - Technology: look for releases, adoption, benchmarks, competitors
+   - Market: look for trends, players, reports, regulations
+   - Competitor: look for product launches, pricing, customer reviews, hiring
+3. Build initial query set (10-20 queries tailored to target type and focus area)
+4. Store target profile in knowledge graph
+
+On subsequent runs:
+1. Load previous query set and results
+2. Check what's new since last collection
+
+---
+
+## Phase 2 — Source Discovery & Query Construction
+
+Build targeted search queries based on focus_area:
+
+**Market Intelligence**: "[target] market size", "[target] industry trends", "[target] competitive landscape"
+**Business Intelligence**: "[target] revenue", "[target] partnerships", "[target] strategy", "[target] leadership"
+**Competitor Analysis**: "[target] vs [competitor]", "[target] pricing", "[target] product launch", "[target] customer reviews"
+**Person Tracking**: "[person] interview", "[person] talk", "[person] publication", "[person] [company]"
+**Technology Monitoring**: "[target] release", "[target] benchmark", "[target] adoption", "[target] alternative"
+**General**: "[target] news", "[target] latest", "[target] analysis", "[target] report"
+
+Add temporal queries: "[target] this week", "[target] 2025"
+
+---
+
+## Phase 3 — Collection Sweep
+
+For each query (up to `max_sources_per_cycle`):
+1. web_search the query
+2. For each promising result, web_fetch to extract full content
+3. Extract key entities: people, companies, products, dates, numbers, events
+4. Tag each data point with:
+   - Source URL
+   - Collection timestamp
+   - Confidence level (high/medium/low based on source quality)
+   - Relevance score (0-100)
+
+Apply source quality heuristics:
+- Official sources (company websites, SEC filings, press releases) = high confidence
+- News outlets (established media) = medium-high confidence
+- Blog posts, social media = medium confidence
+- Forums, anonymous sources = low confidence
+
+---
+
+## Phase 4 — Knowledge Graph Construction
+
+For each collected data point:
+1. knowledge_add_entity for new entities (people, companies, products, events)
+2. knowledge_add_relation for relationships between entities
+3. Attach metadata: source, timestamp, confidence, focus_area
+
+Entity types to track:
+- Person (name, role, company, last_seen)
+- Company (name, industry, size, funding_stage)
+- Product (name, company, category, launch_date)
+- Event (type, date, entities_involved, significance)
+- Number (metric, value, date, context)
+
+Relation types:
+- works_at, founded, invested_in, partnered_with, competes_with
+- launched, acquired, mentioned_in, related_to
+
+---
+
+## Phase 5 — Change Detection & Delta Analysis
+
+Compare current collection against previous state:
+1. Load `collector_knowledge_base.json` (previous snapshot)
+2. Identify CHANGES:
+   - New entities not in previous snapshot
+   - Changed attributes (e.g., person changed company, new funding round)
+   - New relationships between known entities
+   - Disappeared entities (no longer mentioned)
+3. Score each change by significance (critical/important/minor):
+   - Critical: leadership change, acquisition, major funding, product launch
+   - Important: new partnership, hiring surge, pricing change, competitor move
+   - Minor: blog post, minor update, mention in article
+
+If `alert_on_changes` is enabled and critical changes found:
+- event_publish with change summary
+
+If `track_sentiment` is enabled:
+- Classify each source as positive/negative/neutral toward the target
+- Track sentiment trend vs previous cycle
+- Note significant sentiment shifts in the report
+
+---
+
+## Phase 6 — Report Generation
+
+Generate an intelligence report in the configured `report_format`:
+
+**Markdown format**:
+```markdown
+# Intelligence Report: [target_subject]
+**Date**: YYYY-MM-DD | **Cycle**: N | **Sources Processed**: X
+
+## Key Changes Since Last Report
+- [Critical/Important changes with details]
+
+## Intelligence Summary
+[2-3 paragraph synthesis of collected intelligence]
+
+## Entity Map
+| Entity | Type | Status | Confidence |
+|--------|------|--------|------------|
+
+## Sources
+1. [Source title](url) — confidence: high — extracted: [key facts]
+
+## Sentiment Trend (if enabled)
+Positive: X% | Neutral: Y% | Negative: Z% | Trend: [up/down/stable]
+```
+
+Save to: `collector_report_YYYY-MM-DD.{md,json,html}`
+
+---
+
+## Phase 7 — State Persistence
+
+1. Save updated knowledge base to `collector_knowledge_base.json`
+2. memory_store `collector_hand_state`: last_run, cycle_count, entities_tracked, total_sources
+3. Update dashboard stats:
+   - memory_store `collector_hand_data_points` — total data points collected
+   - memory_store `collector_hand_entities_tracked` — unique entities in knowledge graph
+   - memory_store `collector_hand_reports_generated` — increment report count
+   - memory_store `collector_hand_last_update` — current timestamp
+
+---
+
+## Guidelines
+
+- NEVER fabricate intelligence — every claim must be sourced
+- Cross-reference critical claims across multiple sources before reporting
+- Clearly distinguish facts from analysis/speculation in reports
+- Respect rate limits — add delays between web fetches
+- If a source is behind a paywall, note it as "paywalled" and extract what's visible
+- Prioritize recency — newer information is generally more valuable
+- If the user messages you directly, pause collection and respond to their question
+- For competitor analysis, maintain objectivity — report facts, not opinions
+"""
+
+[dashboard]
+[[dashboard.metrics]]
+label = "Data Points"
+memory_key = "collector_hand_data_points"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Entities Tracked"
+memory_key = "collector_hand_entities_tracked"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Reports Generated"
+memory_key = "collector_hand_reports_generated"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Last Update"
+memory_key = "collector_hand_last_update"
+format = "text"
--- a/crates/openfang-hands/bundled/collector/SKILL.md
+++ b/crates/openfang-hands/bundled/collector/SKILL.md
@@ -0,0 +1,271 @@
+---
+name: collector-hand-skill
+version: "1.0.0"
+description: "Expert knowledge for AI intelligence collection — OSINT methodology, entity extraction, knowledge graphs, change detection, and sentiment analysis"
+runtime: prompt_only
+---
+
+# Intelligence Collection Expert Knowledge
+
+## OSINT Methodology
+
+### Collection Cycle
+1. **Planning**: Define target, scope, and collection requirements
+2. **Collection**: Gather raw data from open sources
+3. **Processing**: Extract entities, relationships, and data points
+4. **Analysis**: Synthesize findings, identify patterns, detect changes
+5. **Dissemination**: Generate reports, alerts, and updates
+6. **Feedback**: Refine queries based on what worked and what didn't
+
+### Source Categories (by reliability)
+| Tier | Source Type | Reliability | Examples |
+|------|-----------|-------------|---------|
+| 1 | Official/Primary | Very High | Company filings, government data, press releases |
+| 2 | Institutional | High | News agencies (Reuters, AP), research institutions |
+| 3 | Professional | Medium-High | Industry publications, analyst reports, expert blogs |
+| 4 | Community | Medium | Forums, social media, review sites |
+| 5 | Anonymous/Unverified | Low | Anonymous posts, rumors, unattributed claims |
+
+### Search Query Construction by Focus Area
+
+**Market Intelligence**:
+```
+"[target] market share"
+"[target] industry report [year]"
+"[target] TAM SAM SOM"
+"[target] growth rate"
+"[target] market analysis"
+"[target industry] trends [year]"
+```
+
+**Business Intelligence**:
+```
+"[company] revenue" OR "[company] earnings"
+"[company] CEO" OR "[company] leadership team"
+"[company] strategy" OR "[company] roadmap"
+"[company] partnerships" OR "[company] acquisition"
+"[company] annual report" OR "[company] 10-K"
+site:sec.gov "[company]"
+```
+
+**Competitor Analysis**:
+```
+"[company] vs [competitor]"
+"[company] alternative"
+"[company] review" OR "[company] comparison"
+"[company] pricing" site:g2.com OR site:capterra.com
+"[company] customer reviews" site:trustpilot.com
+"switch from [company] to"
+```
+
+**Person Tracking**:
+```
+"[person name]" "[company]"
+"[person name]" interview OR podcast OR keynote
+"[person name]" site:linkedin.com
+"[person name]" publication OR paper
+"[person name]" conference OR summit
+```
+
+**Technology Monitoring**:
+```
+"[technology] release" OR "[technology] update"
+"[technology] benchmark [year]"
+"[technology] adoption" OR "[technology] usage statistics"
+"[technology] vs [alternative]"
+"[technology]" site:github.com
+"[technology] roadmap" OR "[technology] changelog"
+```
+
+---
+
+## Entity Extraction Patterns
+
+### Named Entity Types
+1. **Person**: Name, title, organization, role
+2. **Organization**: Company name, type, industry, location, size
+3. **Product**: Product name, company, category, version
+4. **Event**: Type, date, participants, location, significance
+5. **Financial**: Amount, currency, type (funding, revenue, valuation)
+6. **Technology**: Name, version, category, vendor
+7. **Location**: City, state, country, region
+8. **Date/Time**: Specific dates, time ranges, deadlines
+
+### Extraction Heuristics
+- **Person detection**: Title + Name pattern ("CEO John Smith"), bylines, quoted speakers
+- **Organization detection**: Legal suffixes (Inc, LLC), "at [Company]", domain names
+- **Financial detection**: Currency symbols, "raised $X", "valued at", "revenue of"
+- **Event detection**: Date + verb ("launched on", "announced at", "acquired")
+- **Technology detection**: CamelCase names, version numbers, "built with", "powered by"
+
+---
+
+## Knowledge Graph Best Practices
+
+### Entity Schema
+```json
+{
+  "entity_id": "unique_id",
+  "name": "Entity Name",
+  "type": "person|company|product|event|technology",
+  "attributes": {
+    "key": "value"
+  },
+  "sources": ["url1", "url2"],
+  "first_seen": "timestamp",
+  "last_seen": "timestamp",
+  "confidence": "high|medium|low"
+}
+```
+
+### Relation Schema
+```json
+{
+  "source_entity": "entity_id_1",
+  "relation": "works_at|founded|competes_with|...",
+  "target_entity": "entity_id_2",
+  "attributes": {
+    "since": "date",
+    "context": "description"
+  },
+  "source": "url",
+  "confidence": "high|medium|low"
+}
+```
+
+### Common Relations
+| Relation | Between | Example |
+|----------|---------|---------|
+| works_at | Person → Company | "Jane Smith works at Acme" |
+| founded | Person → Company | "John Doe founded StartupX" |
+| invested_in | Company → Company | "VC Fund invested in StartupX" |
+| competes_with | Company → Company | "Acme competes with BetaCo" |
+| partnered_with | Company → Company | "Acme partnered with CloudY" |
+| launched | Company → Product | "Acme launched ProductZ" |
+| acquired | Company → Company | "BigCorp acquired StartupX" |
+| uses | Company → Technology | "Acme uses Kubernetes" |
+| mentioned_in | Entity → Source | "Acme mentioned in TechCrunch" |
+
+---
+
+## Change Detection Methodology
+
+### Snapshot Comparison
+1. Store the current state of all entities as a JSON snapshot
+2. On next collection cycle, compare new state against previous snapshot
+3. Classify changes:
+
+| Change Type | Significance | Example |
+|-------------|-------------|---------|
+| Entity appeared | Varies | New competitor enters market |
+| Entity disappeared | Important | Company goes quiet, product deprecated |
+| Attribute changed | Critical-Minor | CEO changed (critical), address changed (minor) |
+| New relation | Important | New partnership, acquisition, hiring |
+| Relation removed | Important | Person left company, partnership ended |
+| Sentiment shift | Important | Positive→Negative media coverage |
+
+### Significance Scoring
+```
+CRITICAL (immediate alert):
+  - Leadership change (CEO, CTO, board)
+  - Acquisition or merger
+  - Major funding round (>$10M)
+  - Product discontinuation
+  - Legal action or regulatory issue
+
+IMPORTANT (include in next report):
+  - New product launch
+  - New partnership or integration
+  - Hiring surge (>5 roles)
+  - Pricing change
+  - Competitor move
+  - Major customer win/loss
+
+MINOR (note in report):
+  - Blog post or press mention
+  - Minor update or patch
+  - Social media activity spike
+  - Conference appearance
+  - Job posting (individual)
+```
+
+---
+
+## Sentiment Analysis Heuristics
+
+When `track_sentiment` is enabled, classify each source's tone:
+
+### Classification Rules
+- **Positive indicators**: "growth", "innovation", "breakthrough", "success", "award", "expansion", "praise", "recommend"
+- **Negative indicators**: "lawsuit", "layoffs", "decline", "controversy", "failure", "breach", "criticism", "warning"
+- **Neutral indicators**: factual reporting without strong adjectives, data-only articles, announcements
+
+### Sentiment Scoring
+```
+Strong positive: +2 (e.g., "Company wins major award")
+Mild positive:   +1 (e.g., "Steady growth continues")
+Neutral:          0 (e.g., "Company releases Q3 report")
+Mild negative:   -1 (e.g., "Faces increased competition")
+Strong negative: -2 (e.g., "Major data breach disclosed")
+```
+
+Track rolling average over last 5 collection cycles to detect trends.
+
+---
+
+## Report Templates
+
+### Intelligence Brief (Markdown)
+```markdown
+# Intelligence Report: [Target]
+**Date**: YYYY-MM-DD HH:MM UTC
+**Collection Cycle**: #N
+**Sources Processed**: X
+**New Data Points**: Y
+
+## Priority Changes
+1. [CRITICAL] [Description + source]
+2. [IMPORTANT] [Description + source]
+
+## Executive Summary
+[2-3 paragraph synthesis of new intelligence]
+
+## Detailed Findings
+
+### [Category 1]
+- Finding with [source](url)
+- Data point with confidence: high/medium/low
+
+### [Category 2]
+- ...
+
+## Entity Updates
+| Entity | Change | Previous | Current | Source |
+|--------|--------|----------|---------|--------|
+
+## Sentiment Trend
+| Period | Score | Direction | Notable |
+|--------|-------|-----------|---------|
+
+## Collection Metadata
+- Queries executed: N
+- Sources fetched: N
+- New entities: N
+- Updated entities: N
+- Next scheduled collection: [datetime]
+```
+
+---
+
+## Source Evaluation Checklist
+
+Before including data in the knowledge graph, evaluate:
+
+1. **Recency**: Published within relevant timeframe? Stale data can mislead.
+2. **Primary vs Secondary**: Is this the original source, or citing someone else?
+3. **Corroboration**: Do other independent sources confirm this?
+4. **Bias check**: Does the source have a financial or political interest in this claim?
+5. **Specificity**: Does it provide concrete data, or vague assertions?
+6. **Track record**: Has this source been reliable in the past?
+
+If a claim fails 3+ checks, downgrade its confidence to "low".