Phase 2 P1 Tasks Completed: API Layer (gateway-client.ts, gatewayStore.ts): - Add Channels CRUD: getChannel, createChannel, updateChannel, deleteChannel - Add Triggers CRUD: getTrigger, createTrigger, updateTrigger, deleteTrigger - Add Skills CRUD: getSkill, createSkill, updateSkill, deleteSkill - Add Scheduled Tasks API: createScheduledTask, deleteScheduledTask, toggleScheduledTask - Add loadModels action for dynamic model list UI Components: - ModelsAPI.tsx: Dynamic model loading from API with loading/error states - SchedulerPanel.tsx: Full CreateJobModal with cron/interval/once scheduling - SecurityStatus.tsx: Loading states, error handling, retry functionality - WorkflowEditor.tsx: New workflow creation/editing modal (new file) - WorkflowHistory.tsx: Workflow execution history viewer (new file) - WorkflowList.tsx: Integrated editor and history access Configuration: - Add 4 Hands TOML configs: clip, collector, predictor, twitter Documentation (SYSTEM_ANALYSIS.md): - Update API coverage: 65% → 89% (53/62 endpoints) - Update UI completion: 85% → 92% - Mark Phase 2 P1 tasks as completed - Update technical debt cleanup status Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
136 lines
2.5 KiB
TOML
136 lines
2.5 KiB
TOML
# Collector Hand - 数据收集和聚合能力包
|
|
#
|
|
# OpenFang Hand 配置
|
|
# 这个 Hand 提供自动化数据收集、网页抓取和聚合能力
|
|
|
|
[hand]
|
|
name = "collector"
|
|
version = "1.0.0"
|
|
description = "数据收集和聚合能力包 - 自动抓取、解析和结构化数据"
|
|
author = "ZCLAW Team"
|
|
|
|
# Hand 类型
|
|
type = "data"
|
|
|
|
# 是否需要人工审批才能执行
|
|
requires_approval = false
|
|
|
|
# 默认超时时间(秒)
|
|
timeout = 300
|
|
|
|
# 最大并发执行数
|
|
max_concurrent = 5
|
|
|
|
# 能力标签
|
|
tags = ["data", "scraping", "collection", "aggregation", "web"]
|
|
|
|
[hand.config]
|
|
# 请求配置
|
|
user_agent = "ZCLAW-Collector/1.0"
|
|
request_timeout = 30
|
|
retry_count = 3
|
|
retry_delay = 5
|
|
|
|
# 分页处理
|
|
max_pages = 100
|
|
pagination_delay = 1 # 秒
|
|
|
|
# 输出配置
|
|
default_output_format = "json" # json, csv, xlsx
|
|
output_dir = "/tmp/zclaw/collector"
|
|
|
|
# 反爬虫设置
|
|
respect_robots_txt = true
|
|
rate_limit_per_second = 2
|
|
|
|
[hand.triggers]
|
|
# 触发器配置
|
|
manual = true
|
|
schedule = true
|
|
webhook = true
|
|
|
|
# 事件触发器
|
|
[[hand.triggers.events]]
|
|
type = "schedule.cron"
|
|
pattern = "0 */6 * * *" # 每6小时
|
|
priority = 5
|
|
|
|
[[hand.triggers.events]]
|
|
type = "chat.intent"
|
|
pattern = "收集|抓取|爬取|采集|scrape|collect|crawl"
|
|
priority = 5
|
|
|
|
[hand.permissions]
|
|
# 权限要求
|
|
requires = [
|
|
"web.fetch",
|
|
"file.read",
|
|
"file.write"
|
|
]
|
|
|
|
# RBAC 角色要求
|
|
roles = ["operator.read", "operator.write"]
|
|
|
|
# 速率限制
|
|
[hand.rate_limit]
|
|
max_requests = 50
|
|
window_seconds = 3600 # 1 hour
|
|
|
|
# 审计配置
|
|
[hand.audit]
|
|
log_inputs = true
|
|
log_outputs = true
|
|
retention_days = 30
|
|
|
|
# 参数定义
|
|
[[hand.parameters]]
|
|
name = "targetUrl"
|
|
label = "目标 URL"
|
|
type = "text"
|
|
required = true
|
|
description = "要抓取的网页 URL"
|
|
|
|
[[hand.parameters]]
|
|
name = "selector"
|
|
label = "CSS 选择器"
|
|
type = "text"
|
|
required = false
|
|
description = "要提取的元素 CSS 选择器"
|
|
|
|
[[hand.parameters]]
|
|
name = "outputFormat"
|
|
label = "输出格式"
|
|
type = "select"
|
|
required = false
|
|
default = "json"
|
|
options = ["json", "csv", "xlsx"]
|
|
|
|
[[hand.parameters]]
|
|
name = "pagination"
|
|
label = "跟踪分页"
|
|
type = "boolean"
|
|
required = false
|
|
default = false
|
|
description = "是否自动跟踪分页链接"
|
|
|
|
# 工作流步骤
|
|
[[hand.workflow]]
|
|
id = "fetch"
|
|
name = "获取页面"
|
|
description = "下载目标网页内容"
|
|
|
|
[[hand.workflow]]
|
|
id = "parse"
|
|
name = "解析内容"
|
|
description = "使用选择器提取目标数据"
|
|
|
|
[[hand.workflow]]
|
|
id = "transform"
|
|
name = "转换数据"
|
|
description = "清理和结构化提取的数据"
|
|
|
|
[[hand.workflow]]
|
|
id = "export"
|
|
name = "导出结果"
|
|
description = "保存为指定格式的文件"
|