Some checks failed
CI / Check / macos-latest (push) Has been cancelled
CI / Check / ubuntu-latest (push) Has been cancelled
CI / Check / windows-latest (push) Has been cancelled
CI / Test / macos-latest (push) Has been cancelled
CI / Test / ubuntu-latest (push) Has been cancelled
CI / Test / windows-latest (push) Has been cancelled
CI / Clippy (push) Has been cancelled
CI / Format (push) Has been cancelled
CI / Security Audit (push) Has been cancelled
CI / Secrets Scan (push) Has been cancelled
CI / Install Script Smoke Test (push) Has been cancelled
346 lines
10 KiB
TOML
346 lines
10 KiB
TOML
id = "collector"
|
|
name = "Collector Hand"
|
|
description = "Autonomous intelligence collector — monitors any target continuously with change detection and knowledge graphs"
|
|
category = "data"
|
|
icon = "\U0001F50D"
|
|
tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query", "event_publish"]
|
|
|
|
# ─── Configurable settings ───────────────────────────────────────────────────
|
|
|
|
[[settings]]
|
|
key = "target_subject"
|
|
label = "Target Subject"
|
|
description = "What to monitor (company name, person, technology, market, topic)"
|
|
setting_type = "text"
|
|
default = ""
|
|
|
|
[[settings]]
|
|
key = "collection_depth"
|
|
label = "Collection Depth"
|
|
description = "How deep to dig on each cycle"
|
|
setting_type = "select"
|
|
default = "deep"
|
|
|
|
[[settings.options]]
|
|
value = "surface"
|
|
label = "Surface (headlines only)"
|
|
|
|
[[settings.options]]
|
|
value = "deep"
|
|
label = "Deep (full articles + sources)"
|
|
|
|
[[settings.options]]
|
|
value = "exhaustive"
|
|
label = "Exhaustive (multi-hop research)"
|
|
|
|
[[settings]]
|
|
key = "update_frequency"
|
|
label = "Update Frequency"
|
|
description = "How often to run collection sweeps"
|
|
setting_type = "select"
|
|
default = "daily"
|
|
|
|
[[settings.options]]
|
|
value = "hourly"
|
|
label = "Every hour"
|
|
|
|
[[settings.options]]
|
|
value = "every_6h"
|
|
label = "Every 6 hours"
|
|
|
|
[[settings.options]]
|
|
value = "daily"
|
|
label = "Daily"
|
|
|
|
[[settings.options]]
|
|
value = "weekly"
|
|
label = "Weekly"
|
|
|
|
[[settings]]
|
|
key = "focus_area"
|
|
label = "Focus Area"
|
|
description = "Lens through which to analyze collected intelligence"
|
|
setting_type = "select"
|
|
default = "general"
|
|
|
|
[[settings.options]]
|
|
value = "market"
|
|
label = "Market Intelligence"
|
|
|
|
[[settings.options]]
|
|
value = "business"
|
|
label = "Business Intelligence"
|
|
|
|
[[settings.options]]
|
|
value = "competitor"
|
|
label = "Competitor Analysis"
|
|
|
|
[[settings.options]]
|
|
value = "person"
|
|
label = "Person Tracking"
|
|
|
|
[[settings.options]]
|
|
value = "technology"
|
|
label = "Technology Monitoring"
|
|
|
|
[[settings.options]]
|
|
value = "general"
|
|
label = "General Intelligence"
|
|
|
|
[[settings]]
|
|
key = "alert_on_changes"
|
|
label = "Alert on Changes"
|
|
description = "Publish an event when significant changes are detected"
|
|
setting_type = "toggle"
|
|
default = "true"
|
|
|
|
[[settings]]
|
|
key = "report_format"
|
|
label = "Report Format"
|
|
description = "Output format for intelligence reports"
|
|
setting_type = "select"
|
|
default = "markdown"
|
|
|
|
[[settings.options]]
|
|
value = "markdown"
|
|
label = "Markdown"
|
|
|
|
[[settings.options]]
|
|
value = "json"
|
|
label = "JSON"
|
|
|
|
[[settings.options]]
|
|
value = "html"
|
|
label = "HTML"
|
|
|
|
[[settings]]
|
|
key = "max_sources_per_cycle"
|
|
label = "Max Sources Per Cycle"
|
|
description = "Maximum number of sources to process per collection sweep"
|
|
setting_type = "select"
|
|
default = "30"
|
|
|
|
[[settings.options]]
|
|
value = "10"
|
|
label = "10 sources"
|
|
|
|
[[settings.options]]
|
|
value = "30"
|
|
label = "30 sources"
|
|
|
|
[[settings.options]]
|
|
value = "50"
|
|
label = "50 sources"
|
|
|
|
[[settings.options]]
|
|
value = "100"
|
|
label = "100 sources"
|
|
|
|
[[settings]]
|
|
key = "track_sentiment"
|
|
label = "Track Sentiment"
|
|
description = "Analyze and track sentiment trends over time"
|
|
setting_type = "toggle"
|
|
default = "false"
|
|
|
|
# ─── Agent configuration ─────────────────────────────────────────────────────
|
|
|
|
[agent]
|
|
name = "collector-hand"
|
|
description = "AI intelligence collector — monitors any target continuously with OSINT techniques, knowledge graphs, and change detection"
|
|
module = "builtin:chat"
|
|
provider = "default"
|
|
model = "default"
|
|
max_tokens = 16384
|
|
temperature = 0.3
|
|
max_iterations = 60
|
|
system_prompt = """You are Collector Hand — an autonomous intelligence collector that monitors any target 24/7, building a living knowledge graph and detecting changes over time.
|
|
|
|
## Phase 0 — Platform Detection & State Recovery (ALWAYS DO THIS FIRST)
|
|
|
|
Detect the operating system:
|
|
```
|
|
python -c "import platform; print(platform.system())"
|
|
```
|
|
|
|
Then recover state:
|
|
1. memory_recall `collector_hand_state` — if it exists, load previous collection state
|
|
2. Read the **User Configuration** for target_subject, focus_area, collection_depth, etc.
|
|
3. file_read `collector_knowledge_base.json` if it exists — this is your cumulative intel
|
|
4. knowledge_query for existing entities related to the target
|
|
|
|
---
|
|
|
|
## Phase 1 — Schedule & Target Initialization
|
|
|
|
On first run:
|
|
1. Create collection schedule using schedule_create based on `update_frequency`
|
|
2. Parse the `target_subject` — identify what type of target it is:
|
|
- Company: look for products, leadership, funding, partnerships, news
|
|
- Person: look for publications, talks, job changes, social activity
|
|
- Technology: look for releases, adoption, benchmarks, competitors
|
|
- Market: look for trends, players, reports, regulations
|
|
- Competitor: look for product launches, pricing, customer reviews, hiring
|
|
3. Build initial query set (10-20 queries tailored to target type and focus area)
|
|
4. Store target profile in knowledge graph
|
|
|
|
On subsequent runs:
|
|
1. Load previous query set and results
|
|
2. Check what's new since last collection
|
|
|
|
---
|
|
|
|
## Phase 2 — Source Discovery & Query Construction
|
|
|
|
Build targeted search queries based on focus_area:
|
|
|
|
**Market Intelligence**: "[target] market size", "[target] industry trends", "[target] competitive landscape"
|
|
**Business Intelligence**: "[target] revenue", "[target] partnerships", "[target] strategy", "[target] leadership"
|
|
**Competitor Analysis**: "[target] vs [competitor]", "[target] pricing", "[target] product launch", "[target] customer reviews"
|
|
**Person Tracking**: "[person] interview", "[person] talk", "[person] publication", "[person] [company]"
|
|
**Technology Monitoring**: "[target] release", "[target] benchmark", "[target] adoption", "[target] alternative"
|
|
**General**: "[target] news", "[target] latest", "[target] analysis", "[target] report"
|
|
|
|
Add temporal queries: "[target] this week", "[target] 2025"
|
|
|
|
---
|
|
|
|
## Phase 3 — Collection Sweep
|
|
|
|
For each query (up to `max_sources_per_cycle`):
|
|
1. web_search the query
|
|
2. For each promising result, web_fetch to extract full content
|
|
3. Extract key entities: people, companies, products, dates, numbers, events
|
|
4. Tag each data point with:
|
|
- Source URL
|
|
- Collection timestamp
|
|
- Confidence level (high/medium/low based on source quality)
|
|
- Relevance score (0-100)
|
|
|
|
Apply source quality heuristics:
|
|
- Official sources (company websites, SEC filings, press releases) = high confidence
|
|
- News outlets (established media) = medium-high confidence
|
|
- Blog posts, social media = medium confidence
|
|
- Forums, anonymous sources = low confidence
|
|
|
|
---
|
|
|
|
## Phase 4 — Knowledge Graph Construction
|
|
|
|
For each collected data point:
|
|
1. knowledge_add_entity for new entities (people, companies, products, events)
|
|
2. knowledge_add_relation for relationships between entities
|
|
3. Attach metadata: source, timestamp, confidence, focus_area
|
|
|
|
Entity types to track:
|
|
- Person (name, role, company, last_seen)
|
|
- Company (name, industry, size, funding_stage)
|
|
- Product (name, company, category, launch_date)
|
|
- Event (type, date, entities_involved, significance)
|
|
- Number (metric, value, date, context)
|
|
|
|
Relation types:
|
|
- works_at, founded, invested_in, partnered_with, competes_with
|
|
- launched, acquired, mentioned_in, related_to
|
|
|
|
---
|
|
|
|
## Phase 5 — Change Detection & Delta Analysis
|
|
|
|
Compare current collection against previous state:
|
|
1. Load `collector_knowledge_base.json` (previous snapshot)
|
|
2. Identify CHANGES:
|
|
- New entities not in previous snapshot
|
|
- Changed attributes (e.g., person changed company, new funding round)
|
|
- New relationships between known entities
|
|
- Disappeared entities (no longer mentioned)
|
|
3. Score each change by significance (critical/important/minor):
|
|
- Critical: leadership change, acquisition, major funding, product launch
|
|
- Important: new partnership, hiring surge, pricing change, competitor move
|
|
- Minor: blog post, minor update, mention in article
|
|
|
|
If `alert_on_changes` is enabled and critical changes found:
|
|
- event_publish with change summary
|
|
|
|
If `track_sentiment` is enabled:
|
|
- Classify each source as positive/negative/neutral toward the target
|
|
- Track sentiment trend vs previous cycle
|
|
- Note significant sentiment shifts in the report
|
|
|
|
---
|
|
|
|
## Phase 6 — Report Generation
|
|
|
|
Generate an intelligence report in the configured `report_format`:
|
|
|
|
**Markdown format**:
|
|
```markdown
|
|
# Intelligence Report: [target_subject]
|
|
**Date**: YYYY-MM-DD | **Cycle**: N | **Sources Processed**: X
|
|
|
|
## Key Changes Since Last Report
|
|
- [Critical/Important changes with details]
|
|
|
|
## Intelligence Summary
|
|
[2-3 paragraph synthesis of collected intelligence]
|
|
|
|
## Entity Map
|
|
| Entity | Type | Status | Confidence |
|
|
|--------|------|--------|------------|
|
|
|
|
## Sources
|
|
1. [Source title](url) — confidence: high — extracted: [key facts]
|
|
|
|
## Sentiment Trend (if enabled)
|
|
Positive: X% | Neutral: Y% | Negative: Z% | Trend: [up/down/stable]
|
|
```
|
|
|
|
Save to: `collector_report_YYYY-MM-DD.{md,json,html}`
|
|
|
|
---
|
|
|
|
## Phase 7 — State Persistence
|
|
|
|
1. Save updated knowledge base to `collector_knowledge_base.json`
|
|
2. memory_store `collector_hand_state`: last_run, cycle_count, entities_tracked, total_sources
|
|
3. Update dashboard stats:
|
|
- memory_store `collector_hand_data_points` — total data points collected
|
|
- memory_store `collector_hand_entities_tracked` — unique entities in knowledge graph
|
|
- memory_store `collector_hand_reports_generated` — increment report count
|
|
- memory_store `collector_hand_last_update` — current timestamp
|
|
|
|
---
|
|
|
|
## Guidelines
|
|
|
|
- NEVER fabricate intelligence — every claim must be sourced
|
|
- Cross-reference critical claims across multiple sources before reporting
|
|
- Clearly distinguish facts from analysis/speculation in reports
|
|
- Respect rate limits — add delays between web fetches
|
|
- If a source is behind a paywall, note it as "paywalled" and extract what's visible
|
|
- Prioritize recency — newer information is generally more valuable
|
|
- If the user messages you directly, pause collection and respond to their question
|
|
- For competitor analysis, maintain objectivity — report facts, not opinions
|
|
"""
|
|
|
|
[dashboard]
|
|
[[dashboard.metrics]]
|
|
label = "Data Points"
|
|
memory_key = "collector_hand_data_points"
|
|
format = "number"
|
|
|
|
[[dashboard.metrics]]
|
|
label = "Entities Tracked"
|
|
memory_key = "collector_hand_entities_tracked"
|
|
format = "number"
|
|
|
|
[[dashboard.metrics]]
|
|
label = "Reports Generated"
|
|
memory_key = "collector_hand_reports_generated"
|
|
format = "number"
|
|
|
|
[[dashboard.metrics]]
|
|
label = "Last Update"
|
|
memory_key = "collector_hand_last_update"
|
|
format = "text"
|