zclaw_openfang/hands/speech.HAND.toml

# Speech Hand - 语音合成能力包
#
# ZCLAW Hand 配置
# 提供文本转语音 (TTS) 能力，支持多种语音和语言

[hand]
name = "speech"
version = "1.0.0"
description = "语音合成能力包 - 将文本转换为自然语音输出"
author = "ZCLAW Team"

type = "media"
requires_approval = false
timeout = 120
max_concurrent = 3

tags = ["speech", "tts", "voice", "audio", "education", "accessibility", "demo"]

[hand.config]
# TTS 提供商: browser, azure, openai, elevenlabs, local
provider = "browser"

# 默认语音
default_voice = "default"

# 默认语速 (0.5 - 2.0)
default_rate = 1.0

# 默认音调 (0.5 - 2.0)
default_pitch = 1.0

# 默认音量 (0 - 1.0)
default_volume = 1.0

# 语言代码
default_language = "zh-CN"

# 是否缓存音频
cache_audio = true

# Azure TTS 配置 (如果 provider = "azure")
[hand.config.azure]
# voice_name = "zh-CN-XiaoxiaoNeural"
# region = "eastasia"

# OpenAI TTS 配置 (如果 provider = "openai")
[hand.config.openai]
# model = "tts-1"
# voice = "alloy"

# 浏览器 TTS 配置 (如果 provider = "browser")
[hand.config.browser]
# 使用系统默认语音
use_system_voice = true
# 语音名称映射
voice_mapping = { "zh-CN" = "Microsoft Huihui", "en-US" = "Microsoft David" }

[hand.triggers]
manual = true
schedule = false
webhook = false

[[hand.triggers.events]]
type = "chat.intent"
pattern = "朗读|念|说|播放语音|speak|read|say|tts"
priority = 5

[hand.permissions]
requires = [
  "speech.synthesize",
  "speech.play",
  "speech.stop"
]

roles = ["operator.read"]

[hand.rate_limit]
max_requests = 100
window_seconds = 3600

[hand.audit]
log_inputs = true
log_outputs = false  # 音频不记录
retention_days = 3

# 语音动作定义
[[hand.actions]]
id = "speak"
name = "朗读文本"
description = "将文本转换为语音并播放"
params = { text = "string", voice = "string?", rate = "number?", pitch = "number?" }

[[hand.actions]]
id = "speak_ssml"
name = "朗读 SSML"
description = "使用 SSML 标记朗读文本（支持更精细控制）"
params = { ssml = "string", voice = "string?" }

[[hand.actions]]
id = "pause"
name = "暂停播放"
description = "暂停当前语音播放"
params = {}

[[hand.actions]]
id = "resume"
name = "继续播放"
description = "继续暂停的语音播放"
params = {}

[[hand.actions]]
id = "stop"
name = "停止播放"
description = "停止当前语音播放"
params = {}

[[hand.actions]]
id = "list_voices"
name = "列出可用语音"
description = "获取可用的语音列表"
params = { language = "string?" }

[[hand.actions]]
id = "set_voice"
name = "设置默认语音"
description = "更改默认语音设置"
params = { voice = "string", language = "string?" }