zclaw_openfang/hands/collector.HAND.toml

# Collector Hand - 数据收集和聚合能力包
#
# OpenFang Hand 配置
# 这个 Hand 提供自动化数据收集、网页抓取和聚合能力

[hand]
name = "collector"
version = "1.0.0"
description = "数据收集和聚合能力包 - 自动抓取、解析和结构化数据"
author = "ZCLAW Team"

# Hand 类型
type = "data"

# 是否需要人工审批才能执行
requires_approval = false

# 默认超时时间（秒）
timeout = 300

# 最大并发执行数
max_concurrent = 5

# 能力标签
tags = ["data", "scraping", "collection", "aggregation", "web"]

[hand.config]
# 请求配置
user_agent = "ZCLAW-Collector/1.0"
request_timeout = 30
retry_count = 3
retry_delay = 5

# 分页处理
max_pages = 100
pagination_delay = 1  # 秒

# 输出配置
default_output_format = "json"  # json, csv, xlsx
output_dir = "/tmp/zclaw/collector"

# 反爬虫设置
respect_robots_txt = true
rate_limit_per_second = 2

[hand.triggers]
# 触发器配置
manual = true
schedule = true
webhook = true

# 事件触发器
[[hand.triggers.events]]
type = "schedule.cron"
pattern = "0 */6 * * *"  # 每6小时
priority = 5

[[hand.triggers.events]]
type = "chat.intent"
pattern = "收集|抓取|爬取|采集|scrape|collect|crawl"
priority = 5

[hand.permissions]
# 权限要求
requires = [
  "web.fetch",
  "file.read",
  "file.write"
]

# RBAC 角色要求
roles = ["operator.read", "operator.write"]

# 速率限制
[hand.rate_limit]
max_requests = 50
window_seconds = 3600  # 1 hour

# 审计配置
[hand.audit]
log_inputs = true
log_outputs = true
retention_days = 30

# 参数定义
[[hand.parameters]]
name = "targetUrl"
label = "目标 URL"
type = "text"
required = true
description = "要抓取的网页 URL"

[[hand.parameters]]
name = "selector"
label = "CSS 选择器"
type = "text"
required = false
description = "要提取的元素 CSS 选择器"

[[hand.parameters]]
name = "outputFormat"
label = "输出格式"
type = "select"
required = false
default = "json"
options = ["json", "csv", "xlsx"]

[[hand.parameters]]
name = "pagination"
label = "跟踪分页"
type = "boolean"
required = false
default = false
description = "是否自动跟踪分页链接"

# 工作流步骤
[[hand.workflow]]
id = "fetch"
name = "获取页面"
description = "下载目标网页内容"

[[hand.workflow]]
id = "parse"
name = "解析内容"
description = "使用选择器提取目标数据"

[[hand.workflow]]
id = "transform"
name = "转换数据"
description = "清理和结构化提取的数据"

[[hand.workflow]]
id = "export"
name = "导出结果"
description = "保存为指定格式的文件"