Files
zclaw_openfang/hands/collector.HAND.toml
iven 5599c1a4db feat(phase2): complete P1 tasks - Channels, Triggers, Skills CRUD and UI enhancements
Phase 2 P1 Tasks Completed:

API Layer (gateway-client.ts, gatewayStore.ts):
- Add Channels CRUD: getChannel, createChannel, updateChannel, deleteChannel
- Add Triggers CRUD: getTrigger, createTrigger, updateTrigger, deleteTrigger
- Add Skills CRUD: getSkill, createSkill, updateSkill, deleteSkill
- Add Scheduled Tasks API: createScheduledTask, deleteScheduledTask, toggleScheduledTask
- Add loadModels action for dynamic model list

UI Components:
- ModelsAPI.tsx: Dynamic model loading from API with loading/error states
- SchedulerPanel.tsx: Full CreateJobModal with cron/interval/once scheduling
- SecurityStatus.tsx: Loading states, error handling, retry functionality
- WorkflowEditor.tsx: New workflow creation/editing modal (new file)
- WorkflowHistory.tsx: Workflow execution history viewer (new file)
- WorkflowList.tsx: Integrated editor and history access

Configuration:
- Add 4 Hands TOML configs: clip, collector, predictor, twitter

Documentation (SYSTEM_ANALYSIS.md):
- Update API coverage: 65% → 89% (53/62 endpoints)
- Update UI completion: 85% → 92%
- Mark Phase 2 P1 tasks as completed
- Update technical debt cleanup status

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 01:38:34 +08:00

136 lines
2.5 KiB
TOML

# Collector Hand - 数据收集和聚合能力包
#
# OpenFang Hand 配置
# 这个 Hand 提供自动化数据收集、网页抓取和聚合能力
[hand]
name = "collector"
version = "1.0.0"
description = "数据收集和聚合能力包 - 自动抓取、解析和结构化数据"
author = "ZCLAW Team"
# Hand 类型
type = "data"
# 是否需要人工审批才能执行
requires_approval = false
# 默认超时时间(秒)
timeout = 300
# 最大并发执行数
max_concurrent = 5
# 能力标签
tags = ["data", "scraping", "collection", "aggregation", "web"]
[hand.config]
# 请求配置
user_agent = "ZCLAW-Collector/1.0"
request_timeout = 30
retry_count = 3
retry_delay = 5
# 分页处理
max_pages = 100
pagination_delay = 1 # 秒
# 输出配置
default_output_format = "json" # json, csv, xlsx
output_dir = "/tmp/zclaw/collector"
# 反爬虫设置
respect_robots_txt = true
rate_limit_per_second = 2
[hand.triggers]
# 触发器配置
manual = true
schedule = true
webhook = true
# 事件触发器
[[hand.triggers.events]]
type = "schedule.cron"
pattern = "0 */6 * * *" # 每6小时
priority = 5
[[hand.triggers.events]]
type = "chat.intent"
pattern = "收集|抓取|爬取|采集|scrape|collect|crawl"
priority = 5
[hand.permissions]
# 权限要求
requires = [
"web.fetch",
"file.read",
"file.write"
]
# RBAC 角色要求
roles = ["operator.read", "operator.write"]
# 速率限制
[hand.rate_limit]
max_requests = 50
window_seconds = 3600 # 1 hour
# 审计配置
[hand.audit]
log_inputs = true
log_outputs = true
retention_days = 30
# 参数定义
[[hand.parameters]]
name = "targetUrl"
label = "目标 URL"
type = "text"
required = true
description = "要抓取的网页 URL"
[[hand.parameters]]
name = "selector"
label = "CSS 选择器"
type = "text"
required = false
description = "要提取的元素 CSS 选择器"
[[hand.parameters]]
name = "outputFormat"
label = "输出格式"
type = "select"
required = false
default = "json"
options = ["json", "csv", "xlsx"]
[[hand.parameters]]
name = "pagination"
label = "跟踪分页"
type = "boolean"
required = false
default = false
description = "是否自动跟踪分页链接"
# 工作流步骤
[[hand.workflow]]
id = "fetch"
name = "获取页面"
description = "下载目标网页内容"
[[hand.workflow]]
id = "parse"
name = "解析内容"
description = "使用选择器提取目标数据"
[[hand.workflow]]
id = "transform"
name = "转换数据"
description = "清理和结构化提取的数据"
[[hand.workflow]]
id = "export"
name = "导出结果"
description = "保存为指定格式的文件"