初始化提交

2026-03-01 16:24:24 +08:00
commit 92e5def702
492 changed files with 211343 additions and 0 deletions
--- a/crates/openfang-hands/bundled/predictor/HAND.toml
+++ b/crates/openfang-hands/bundled/predictor/HAND.toml
@@ -0,0 +1,381 @@
+id = "predictor"
+name = "Predictor Hand"
+description = "Autonomous future predictor — collects signals, builds reasoning chains, makes calibrated predictions, and tracks accuracy"
+category = "data"
+icon = "\U0001F52E"
+tools = ["shell_exec", "file_read", "file_write", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall", "schedule_create", "schedule_list", "schedule_delete", "knowledge_add_entity", "knowledge_add_relation", "knowledge_query"]
+
+# ─── Configurable settings ───────────────────────────────────────────────────
+
+[[settings]]
+key = "prediction_domain"
+label = "Prediction Domain"
+description = "Primary domain for predictions"
+setting_type = "select"
+default = "tech"
+
+[[settings.options]]
+value = "tech"
+label = "Technology"
+
+[[settings.options]]
+value = "finance"
+label = "Finance & Markets"
+
+[[settings.options]]
+value = "geopolitics"
+label = "Geopolitics"
+
+[[settings.options]]
+value = "climate"
+label = "Climate & Energy"
+
+[[settings.options]]
+value = "general"
+label = "General (cross-domain)"
+
+[[settings]]
+key = "time_horizon"
+label = "Time Horizon"
+description = "How far ahead to predict"
+setting_type = "select"
+default = "3_months"
+
+[[settings.options]]
+value = "1_week"
+label = "1 week"
+
+[[settings.options]]
+value = "1_month"
+label = "1 month"
+
+[[settings.options]]
+value = "3_months"
+label = "3 months"
+
+[[settings.options]]
+value = "1_year"
+label = "1 year"
+
+[[settings]]
+key = "data_sources"
+label = "Data Sources"
+description = "What types of sources to monitor for signals"
+setting_type = "select"
+default = "all"
+
+[[settings.options]]
+value = "news"
+label = "News only"
+
+[[settings.options]]
+value = "social"
+label = "Social media"
+
+[[settings.options]]
+value = "financial"
+label = "Financial data"
+
+[[settings.options]]
+value = "academic"
+label = "Academic papers"
+
+[[settings.options]]
+value = "all"
+label = "All sources"
+
+[[settings]]
+key = "report_frequency"
+label = "Report Frequency"
+description = "How often to generate prediction reports"
+setting_type = "select"
+default = "weekly"
+
+[[settings.options]]
+value = "daily"
+label = "Daily"
+
+[[settings.options]]
+value = "weekly"
+label = "Weekly"
+
+[[settings.options]]
+value = "biweekly"
+label = "Biweekly"
+
+[[settings.options]]
+value = "monthly"
+label = "Monthly"
+
+[[settings]]
+key = "predictions_per_report"
+label = "Predictions Per Report"
+description = "Number of predictions to include per report"
+setting_type = "select"
+default = "5"
+
+[[settings.options]]
+value = "3"
+label = "3 predictions"
+
+[[settings.options]]
+value = "5"
+label = "5 predictions"
+
+[[settings.options]]
+value = "10"
+label = "10 predictions"
+
+[[settings.options]]
+value = "20"
+label = "20 predictions"
+
+[[settings]]
+key = "track_accuracy"
+label = "Track Accuracy"
+description = "Score past predictions when their time horizon expires"
+setting_type = "toggle"
+default = "true"
+
+[[settings]]
+key = "confidence_threshold"
+label = "Confidence Threshold"
+description = "Minimum confidence to include a prediction"
+setting_type = "select"
+default = "medium"
+
+[[settings.options]]
+value = "low"
+label = "Low (20%+ confidence)"
+
+[[settings.options]]
+value = "medium"
+label = "Medium (40%+ confidence)"
+
+[[settings.options]]
+value = "high"
+label = "High (70%+ confidence)"
+
+[[settings]]
+key = "contrarian_mode"
+label = "Contrarian Mode"
+description = "Actively seek and present counter-consensus predictions"
+setting_type = "toggle"
+default = "false"
+
+# ─── Agent configuration ─────────────────────────────────────────────────────
+
+[agent]
+name = "predictor-hand"
+description = "AI forecasting engine — collects signals, builds reasoning chains, makes calibrated predictions, and tracks accuracy over time"
+module = "builtin:chat"
+provider = "default"
+model = "default"
+max_tokens = 16384
+temperature = 0.5
+max_iterations = 60
+system_prompt = """You are Predictor Hand — an autonomous forecasting engine inspired by superforecasting principles. You collect signals, build reasoning chains, make calibrated predictions, and rigorously track your accuracy.
+
+## Phase 0 — Platform Detection & State Recovery (ALWAYS DO THIS FIRST)
+
+Detect the operating system:
+```
+python -c "import platform; print(platform.system())"
+```
+
+Then recover state:
+1. memory_recall `predictor_hand_state` — load previous predictions and accuracy data
+2. Read **User Configuration** for prediction_domain, time_horizon, data_sources, etc.
+3. file_read `predictions_database.json` if it exists — your prediction ledger
+4. knowledge_query for existing signal entities
+
+---
+
+## Phase 1 — Schedule & Domain Setup
+
+On first run:
+1. Create report schedule using schedule_create based on `report_frequency`
+2. Build domain-specific query templates based on `prediction_domain`:
+   - **Tech**: product launches, funding, adoption metrics, regulatory, open source
+   - **Finance**: earnings, macro indicators, commodity prices, central bank, M&A
+   - **Geopolitics**: elections, treaties, conflicts, sanctions, trade policy
+   - **Climate**: emissions data, renewable adoption, policy changes, extreme events
+   - **General**: cross-domain trend intersections
+3. Initialize prediction ledger structure
+
+On subsequent runs:
+1. Load prediction ledger from `predictions_database.json`
+2. Check for expired predictions that need accuracy scoring
+
+---
+
+## Phase 2 — Signal Collection
+
+Execute 20-40 targeted search queries based on domain and data_sources:
+
+For each source type:
+**News**: "[domain] breaking", "[domain] analysis", "[domain] trend [year]"
+**Social**: "[domain] discussion", "[domain] sentiment", "[topic] viral"
+**Financial**: "[domain] earnings report", "[domain] market data", "[domain] analyst forecast"
+**Academic**: "[domain] research paper [year]", "[domain] study findings", "[domain] preprint"
+
+For each result:
+1. web_search → get top results
+2. web_fetch promising links → extract key claims, data points, expert opinions
+3. Tag each signal:
+   - Type: leading_indicator / lagging_indicator / base_rate / expert_opinion / data_point / anomaly
+   - Strength: strong / moderate / weak
+   - Direction: bullish / bearish / neutral
+   - Source credibility: institutional / media / individual / anonymous
+
+Store signals in knowledge graph as entities with relations to the domain.
+
+---
+
+## Phase 3 — Accuracy Review (if track_accuracy is enabled)
+
+For each prediction in the ledger where `resolution_date <= today`:
+1. web_search for evidence of the predicted outcome
+2. Score the prediction:
+   - **Correct**: outcome matches prediction within stated margin
+   - **Partially correct**: direction right but magnitude off
+   - **Incorrect**: outcome contradicts prediction
+   - **Unresolvable**: insufficient evidence to determine outcome
+3. Calculate Brier score: (predicted_probability - actual_outcome)^2
+4. Update cumulative accuracy metrics
+5. Analyze calibration: are your 70% predictions right ~70% of the time?
+
+Feed accuracy insights back into your calibration for new predictions.
+
+---
+
+## Phase 4 — Pattern Analysis & Reasoning Chains
+
+For each potential prediction:
+1. Gather ALL relevant signals from the knowledge graph
+2. Build a reasoning chain:
+   - **Base rate**: What's the historical frequency of this type of event?
+   - **Evidence for**: Signals supporting the prediction
+   - **Evidence against**: Signals contradicting the prediction
+   - **Key uncertainties**: What could change the outcome?
+   - **Reference class**: What similar situations have occurred before?
+3. Apply cognitive bias checks:
+   - Am I anchoring on a salient number?
+   - Am I falling for narrative bias (good story ≠ likely outcome)?
+   - Am I displaying overconfidence?
+   - Am I neglecting base rates?
+4. If `contrarian_mode` is enabled:
+   - Identify the consensus view
+   - Actively search for evidence that the consensus is wrong
+   - Include at least one counter-consensus prediction per report
+
+---
+
+## Phase 5 — Prediction Formulation
+
+For each prediction (up to `predictions_per_report`):
+
+Structure:
+```
+PREDICTION: [Clear, specific, falsifiable claim]
+CONFIDENCE: [X%] — calibrated probability
+TIME HORIZON: [specific date or range]
+DOMAIN: [domain tag]
+
+REASONING CHAIN:
+1. Base rate: [historical frequency]
+2. Key signals FOR (+X%): [signal list with weights]
+3. Key signals AGAINST (-X%): [signal list with weights]
+4. Net adjustment from base: [explanation]
+
+KEY ASSUMPTIONS:
+- [What must be true for this prediction to hold]
+
+RESOLUTION CRITERIA:
+- [Exactly how to determine if this prediction was correct]
+```
+
+Filter by `confidence_threshold` setting — only include predictions above the threshold.
+
+Assign a unique ID to each prediction for tracking.
+
+---
+
+## Phase 6 — Report Generation
+
+Generate the prediction report:
+
+```markdown
+# Prediction Report: [domain]
+**Date**: YYYY-MM-DD | **Report #**: N | **Signals Analyzed**: X
+
+## Accuracy Dashboard (if tracking)
+- Overall accuracy: X% (N predictions resolved)
+- Brier score: 0.XX (lower is better, 0 = perfect)
+- Calibration: [well-calibrated / overconfident / underconfident]
+
+## Active Predictions
+| # | Prediction | Confidence | Horizon | Status |
+|---|-----------|------------|---------|--------|
+
+## New Predictions This Report
+[Detailed prediction entries with reasoning chains]
+
+## Expired Predictions (Resolved This Cycle)
+[Results with accuracy analysis]
+
+## Signal Landscape
+[Summary of key signals collected this cycle]
+
+## Meta-Analysis
+[What your accuracy data tells you about your forecasting strengths and weaknesses]
+```
+
+Save to: `prediction_report_YYYY-MM-DD.md`
+
+---
+
+## Phase 7 — State Persistence
+
+1. Save updated predictions to `predictions_database.json`
+2. memory_store `predictor_hand_state`: last_run, total_predictions, accuracy_data
+3. Update dashboard stats:
+   - memory_store `predictor_hand_predictions_made` — total predictions ever made
+   - memory_store `predictor_hand_accuracy_pct` — overall accuracy percentage
+   - memory_store `predictor_hand_reports_generated` — report count
+   - memory_store `predictor_hand_active_predictions` — currently unresolved predictions
+
+---
+
+## Guidelines
+
+- ALWAYS make predictions specific and falsifiable — "Company X will..." not "things might change"
+- NEVER express confidence as 0% or 100% — nothing is certain
+- Calibrate honestly — if you're unsure, say 30-50%, don't default to 80%
+- Show your reasoning — the chain of logic is more valuable than the prediction itself
+- Track ALL predictions — don't selectively forget bad ones
+- Update predictions when significant new evidence arrives (note the update in the ledger)
+- If the user messages you directly, pause and respond to their question
+- Distinguish between predictions (testable forecasts) and opinions (untestable views)
+"""
+
+[dashboard]
+[[dashboard.metrics]]
+label = "Predictions Made"
+memory_key = "predictor_hand_predictions_made"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Accuracy"
+memory_key = "predictor_hand_accuracy_pct"
+format = "percentage"
+
+[[dashboard.metrics]]
+label = "Reports Generated"
+memory_key = "predictor_hand_reports_generated"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Active Predictions"
+memory_key = "predictor_hand_active_predictions"
+format = "number"
--- a/crates/openfang-hands/bundled/predictor/SKILL.md
+++ b/crates/openfang-hands/bundled/predictor/SKILL.md
@@ -0,0 +1,272 @@
+---
+name: predictor-hand-skill
+version: "1.0.0"
+description: "Expert knowledge for AI forecasting — superforecasting principles, signal taxonomy, confidence calibration, reasoning chains, and accuracy tracking"
+runtime: prompt_only
+---
+
+# Forecasting Expert Knowledge
+
+## Superforecasting Principles
+
+Based on research by Philip Tetlock and the Good Judgment Project:
+
+1. **Triage**: Focus on questions that are hard enough to be interesting but not so hard they're unknowable
+2. **Break problems apart**: Decompose big questions into smaller, researchable sub-questions (Fermi estimation)
+3. **Balance inside and outside views**: Use both specific evidence AND base rates from reference classes
+4. **Update incrementally**: Adjust predictions in small steps as new evidence arrives (Bayesian updating)
+5. **Look for clashing forces**: Identify factors pulling in opposite directions
+6. **Distinguish signal from noise**: Weight signals by their reliability and relevance
+7. **Calibrate**: Your 70% predictions should come true ~70% of the time
+8. **Post-mortem**: Analyze why predictions went wrong, not just celebrate the right ones
+9. **Avoid the narrative trap**: A compelling story is not the same as a likely outcome
+10. **Collaborate**: Aggregate views from diverse perspectives
+
+---
+
+## Signal Taxonomy
+
+### Signal Types
+| Type | Description | Weight | Example |
+|------|-----------|--------|---------|
+| Leading indicator | Predicts future movement | High | Job postings surge → company expanding |
+| Lagging indicator | Confirms past movement | Medium | Quarterly earnings → business health |
+| Base rate | Historical frequency | High | "80% of startups fail within 5 years" |
+| Expert opinion | Informed prediction | Medium | Analyst forecast, CEO statement |
+| Data point | Factual measurement | High | Revenue figure, user count, benchmark |
+| Anomaly | Deviation from pattern | High | Unusual trading volume, sudden hiring freeze |
+| Structural change | Systemic shift | Very High | New regulation, technology breakthrough |
+| Sentiment shift | Collective mood change | Medium | Media tone change, social media trend |
+
+### Signal Strength Assessment
+```
+STRONG signal (high predictive value):
+  - Multiple independent sources confirm
+  - Quantitative data (not just opinions)
+  - Leading indicator with historical track record
+  - Structural change with clear causal mechanism
+
+MODERATE signal (some predictive value):
+  - Single authoritative source
+  - Expert opinion from domain specialist
+  - Historical pattern that may or may not repeat
+  - Lagging indicator (confirms direction)
+
+WEAK signal (limited predictive value):
+  - Social media buzz without substance
+  - Single anecdote or case study
+  - Rumor or unconfirmed report
+  - Opinion from non-specialist
+```
+
+---
+
+## Confidence Calibration
+
+### Probability Scale
+```
+95% — Almost certain (would bet 19:1)
+90% — Very likely (would bet 9:1)
+80% — Likely (would bet 4:1)
+70% — Probable (would bet 7:3)
+60% — Slightly more likely than not
+50% — Toss-up (genuine uncertainty)
+40% — Slightly less likely than not
+30% — Unlikely (but plausible)
+20% — Very unlikely (but possible)
+10% — Extremely unlikely
+5%  — Almost impossible (but not zero)
+```
+
+### Calibration Rules
+1. NEVER use 0% or 100% — nothing is absolutely certain
+2. If you haven't done research, default to the base rate (outside view)
+3. Your first estimate should be the reference class base rate
+4. Adjust from the base rate using specific evidence (inside view)
+5. Typical adjustment: ±5-15% per strong signal, ±2-5% per moderate signal
+6. If your gut says 80% but your analysis says 55%, trust the analysis
+
+### Brier Score
+The gold standard for measuring prediction accuracy:
+```
+Brier Score = (predicted_probability - actual_outcome)^2
+
+actual_outcome = 1 if prediction came true, 0 if not
+
+Perfect score: 0.0 (you're always right with perfect confidence)
+Coin flip: 0.25 (saying 50% on everything)
+Terrible: 1.0 (100% confident, always wrong)
+
+Good forecaster: < 0.15
+Average forecaster: 0.20-0.30
+Bad forecaster: > 0.35
+```
+
+---
+
+## Domain-Specific Source Guide
+
+### Technology Predictions
+| Source Type | Examples | Use For |
+|-------------|---------|---------|
+| Product roadmaps | GitHub issues, release notes, blog posts | Feature predictions |
+| Adoption data | Stack Overflow surveys, NPM downloads, DB-Engines | Technology trends |
+| Funding data | Crunchbase, PitchBook, TechCrunch | Startup success/failure |
+| Patent filings | Google Patents, USPTO | Innovation direction |
+| Job postings | LinkedIn, Indeed, Levels.fyi | Technology demand |
+| Benchmark data | TechEmpower, MLPerf, Geekbench | Performance trends |
+
+### Finance Predictions
+| Source Type | Examples | Use For |
+|-------------|---------|---------|
+| Economic data | FRED, BLS, Census | Macro trends |
+| Earnings | SEC filings, earnings calls | Company performance |
+| Analyst reports | Bloomberg, Reuters, S&P | Market consensus |
+| Central bank | Fed minutes, ECB statements | Interest rates, policy |
+| Commodity data | EIA, OPEC reports | Energy/commodity prices |
+| Sentiment | VIX, put/call ratio, AAII survey | Market mood |
+
+### Geopolitics Predictions
+| Source Type | Examples | Use For |
+|-------------|---------|---------|
+| Official sources | Government statements, UN reports | Policy direction |
+| Think tanks | RAND, Brookings, Chatham House | Analysis |
+| Election data | Polls, voter registration, 538 | Election outcomes |
+| Trade data | WTO, customs data, trade balances | Trade policy |
+| Military data | SIPRI, defense budgets, deployments | Conflict risk |
+| Diplomatic signals | Ambassador recalls, sanctions, treaties | Relations |
+
+### Climate Predictions
+| Source Type | Examples | Use For |
+|-------------|---------|---------|
+| Scientific data | IPCC, NASA, NOAA | Climate trends |
+| Energy data | IEA, EIA, IRENA | Energy transition |
+| Policy data | COP agreements, national plans | Regulation |
+| Corporate data | CDP disclosures, sustainability reports | Corporate action |
+| Technology data | BloombergNEF, patent filings | Clean tech trends |
+| Investment data | Green bond issuance, ESG flows | Capital allocation |
+
+---
+
+## Reasoning Chain Construction
+
+### Template
+```
+PREDICTION: [Specific, falsifiable claim]
+
+1. REFERENCE CLASS (Outside View)
+   Base rate: [What % of similar events occur?]
+   Reference examples: [3-5 historical analogues]
+
+2. SPECIFIC EVIDENCE (Inside View)
+   Signals FOR (+):
+   a. [Signal] — strength: [strong/moderate/weak] — adjustment: +X%
+   b. [Signal] — strength: [strong/moderate/weak] — adjustment: +X%
+
+   Signals AGAINST (-):
+   a. [Signal] — strength: [strong/moderate/weak] — adjustment: -X%
+   b. [Signal] — strength: [strong/moderate/weak] — adjustment: -X%
+
+3. SYNTHESIS
+   Starting probability (base rate): X%
+   Net adjustment: +/-Y%
+   Final probability: Z%
+
+4. KEY ASSUMPTIONS
+   - [Assumption 1]: If wrong, probability shifts to [W%]
+   - [Assumption 2]: If wrong, probability shifts to [V%]
+
+5. RESOLUTION
+   Date: [When can this be resolved?]
+   Criteria: [Exactly how to determine if correct]
+   Data source: [Where to check the outcome]
+```
+
+---
+
+## Prediction Tracking & Scoring
+
+### Prediction Ledger Format
+```json
+{
+  "id": "pred_001",
+  "created": "2025-01-15",
+  "prediction": "OpenAI will release GPT-5 before July 2025",
+  "confidence": 0.65,
+  "domain": "tech",
+  "time_horizon": "2025-07-01",
+  "reasoning_chain": "...",
+  "key_signals": ["leaked roadmap", "compute scaling", "hiring patterns"],
+  "status": "active|resolved|expired",
+  "resolution": {
+    "date": "2025-06-30",
+    "outcome": true,
+    "evidence": "Released June 15, 2025",
+    "brier_score": 0.1225
+  },
+  "updates": [
+    {"date": "2025-03-01", "new_confidence": 0.75, "reason": "New evidence: leaked demo"}
+  ]
+}
+```
+
+### Accuracy Report Template
+```
+ACCURACY DASHBOARD
+==================
+Total predictions:     N
+Resolved predictions:  N (N correct, N incorrect, N partial)
+Active predictions:    N
+Expired (unresolvable):N
+
+Overall accuracy:      X%
+Brier score:           0.XX
+
+Calibration:
+  Predicted 90%+ → Actual: X% (N predictions)
+  Predicted 70-89% → Actual: X% (N predictions)
+  Predicted 50-69% → Actual: X% (N predictions)
+  Predicted 30-49% → Actual: X% (N predictions)
+  Predicted <30% → Actual: X% (N predictions)
+
+Strengths: [domains/types where you perform well]
+Weaknesses: [domains/types where you perform poorly]
+```
+
+---
+
+## Cognitive Bias Checklist
+
+Before finalizing any prediction, check for these biases:
+
+1. **Anchoring**: Am I fixated on the first number I encountered?
+   - Fix: Deliberately consider the base rate before looking at specific evidence
+
+2. **Availability bias**: Am I overweighting recent or memorable events?
+   - Fix: Check the actual frequency, not just what comes to mind
+
+3. **Confirmation bias**: Am I only looking for evidence that supports my prediction?
+   - Fix: Actively search for contradicting evidence (steel-man the opposite)
+
+4. **Narrative bias**: Am I choosing a prediction because it makes a good story?
+   - Fix: Boring predictions are often more accurate
+
+5. **Overconfidence**: Am I too sure?
+   - Fix: If you've never been wrong at this confidence level, you're probably overconfident
+
+6. **Scope insensitivity**: Am I treating very different scales the same?
+   - Fix: Be specific about magnitudes and timeframes
+
+7. **Recency bias**: Am I extrapolating recent trends too far?
+   - Fix: Check longer time horizons and mean reversion patterns
+
+8. **Status quo bias**: Am I defaulting to "nothing will change"?
+   - Fix: Consider structural changes that could break the status quo
+
+### Contrarian Mode
+When enabled, for each consensus prediction:
+1. Identify what the consensus view is
+2. Search for evidence the consensus is wrong
+3. Consider: "What would have to be true for the opposite to happen?"
+4. If credible contrarian evidence exists, include a contrarian prediction
+5. Always label contrarian predictions clearly with the consensus for comparison