# Collector Hand - 数据收集和聚合能力包 # # OpenFang Hand 配置 # 这个 Hand 提供自动化数据收集、网页抓取和聚合能力 [hand] name = "collector" version = "1.0.0" description = "数据收集和聚合能力包 - 自动抓取、解析和结构化数据" author = "ZCLAW Team" # Hand 类型 type = "data" # 是否需要人工审批才能执行 requires_approval = false # 默认超时时间(秒) timeout = 300 # 最大并发执行数 max_concurrent = 5 # 能力标签 tags = ["data", "scraping", "collection", "aggregation", "web"] [hand.config] # 请求配置 user_agent = "ZCLAW-Collector/1.0" request_timeout = 30 retry_count = 3 retry_delay = 5 # 分页处理 max_pages = 100 pagination_delay = 1 # 秒 # 输出配置 default_output_format = "json" # json, csv, xlsx output_dir = "/tmp/zclaw/collector" # 反爬虫设置 respect_robots_txt = true rate_limit_per_second = 2 [hand.triggers] # 触发器配置 manual = true schedule = true webhook = true # 事件触发器 [[hand.triggers.events]] type = "schedule.cron" pattern = "0 */6 * * *" # 每6小时 priority = 5 [[hand.triggers.events]] type = "chat.intent" pattern = "收集|抓取|爬取|采集|scrape|collect|crawl" priority = 5 [hand.permissions] # 权限要求 requires = [ "web.fetch", "file.read", "file.write" ] # RBAC 角色要求 roles = ["operator.read", "operator.write"] # 速率限制 [hand.rate_limit] max_requests = 50 window_seconds = 3600 # 1 hour # 审计配置 [hand.audit] log_inputs = true log_outputs = true retention_days = 30 # 参数定义 [[hand.parameters]] name = "targetUrl" label = "目标 URL" type = "text" required = true description = "要抓取的网页 URL" [[hand.parameters]] name = "selector" label = "CSS 选择器" type = "text" required = false description = "要提取的元素 CSS 选择器" [[hand.parameters]] name = "outputFormat" label = "输出格式" type = "select" required = false default = "json" options = ["json", "csv", "xlsx"] [[hand.parameters]] name = "pagination" label = "跟踪分页" type = "boolean" required = false default = false description = "是否自动跟踪分页链接" # 工作流步骤 [[hand.workflow]] id = "fetch" name = "获取页面" description = "下载目标网页内容" [[hand.workflow]] id = "parse" name = "解析内容" description = "使用选择器提取目标数据" [[hand.workflow]] id = "transform" name = "转换数据" description = "清理和结构化提取的数据" [[hand.workflow]] id = "export" name = "导出结果" description = "保存为指定格式的文件"