release(v0.2.0): streaming, MCP protocol, Browser Hand, security enhancements

## Major Features

### Streaming Response System
- Implement LlmDriver trait with `stream()` method returning async Stream
- Add SSE parsing for Anthropic and OpenAI API streaming
- Integrate Tauri event system for frontend streaming (`stream:chunk` events)
- Add StreamChunk types: Delta, ToolStart, ToolEnd, Complete, Error

### MCP Protocol Implementation
- Add MCP JSON-RPC 2.0 types (mcp_types.rs)
- Implement stdio-based MCP transport (mcp_transport.rs)
- Support tool discovery, execution, and resource operations

### Browser Hand Implementation
- Complete browser automation with Playwright-style actions
- Support Navigate, Click, Type, Scrape, Screenshot, Wait actions
- Add educational Hands: Whiteboard, Slideshow, Speech, Quiz

### Security Enhancements
- Implement command whitelist/blacklist for shell_exec tool
- Add SSRF protection with private IP blocking
- Create security.toml configuration file

## Test Improvements
- Fix test import paths (security-utils, setup)
- Fix vi.mock hoisting issues with vi.hoisted()
- Update test expectations for validateUrl and sanitizeFilename
- Add getUnsupportedLocalGatewayStatus mock

## Documentation Updates
- Update architecture documentation
- Improve configuration reference
- Add quick-start guide updates

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
iven
2026-03-24 03:24:24 +08:00
parent e49ba4460b
commit 3ff08faa56
78 changed files with 29575 additions and 1682 deletions

127
hands/speech.HAND.toml Normal file
View File

@@ -0,0 +1,127 @@
# Speech Hand - 语音合成能力包
#
# ZCLAW Hand 配置
# 提供文本转语音 (TTS) 能力,支持多种语音和语言
[hand]
name = "speech"
version = "1.0.0"
description = "语音合成能力包 - 将文本转换为自然语音输出"
author = "ZCLAW Team"
type = "media"
requires_approval = false
timeout = 120
max_concurrent = 3
tags = ["speech", "tts", "voice", "audio", "education", "accessibility"]
[hand.config]
# TTS 提供商: browser, azure, openai, elevenlabs, local
provider = "browser"
# 默认语音
default_voice = "default"
# 默认语速 (0.5 - 2.0)
default_rate = 1.0
# 默认音调 (0.5 - 2.0)
default_pitch = 1.0
# 默认音量 (0 - 1.0)
default_volume = 1.0
# 语言代码
default_language = "zh-CN"
# 是否缓存音频
cache_audio = true
# Azure TTS 配置 (如果 provider = "azure")
[hand.config.azure]
# voice_name = "zh-CN-XiaoxiaoNeural"
# region = "eastasia"
# OpenAI TTS 配置 (如果 provider = "openai")
[hand.config.openai]
# model = "tts-1"
# voice = "alloy"
# 浏览器 TTS 配置 (如果 provider = "browser")
[hand.config.browser]
# 使用系统默认语音
use_system_voice = true
# 语音名称映射
voice_mapping = { "zh-CN" = "Microsoft Huihui", "en-US" = "Microsoft David" }
[hand.triggers]
manual = true
schedule = false
webhook = false
[[hand.triggers.events]]
type = "chat.intent"
pattern = "朗读|念|说|播放语音|speak|read|say|tts"
priority = 5
[hand.permissions]
requires = [
"speech.synthesize",
"speech.play",
"speech.stop"
]
roles = ["operator.read"]
[hand.rate_limit]
max_requests = 100
window_seconds = 3600
[hand.audit]
log_inputs = true
log_outputs = false # 音频不记录
retention_days = 3
# 语音动作定义
[[hand.actions]]
id = "speak"
name = "朗读文本"
description = "将文本转换为语音并播放"
params = { text = "string", voice = "string?", rate = "number?", pitch = "number?" }
[[hand.actions]]
id = "speak_ssml"
name = "朗读 SSML"
description = "使用 SSML 标记朗读文本(支持更精细控制)"
params = { ssml = "string", voice = "string?" }
[[hand.actions]]
id = "pause"
name = "暂停播放"
description = "暂停当前语音播放"
params = {}
[[hand.actions]]
id = "resume"
name = "继续播放"
description = "继续暂停的语音播放"
params = {}
[[hand.actions]]
id = "stop"
name = "停止播放"
description = "停止当前语音播放"
params = {}
[[hand.actions]]
id = "list_voices"
name = "列出可用语音"
description = "获取可用的语音列表"
params = { language = "string?" }
[[hand.actions]]
id = "set_voice"
name = "设置默认语音"
description = "更改默认语音设置"
params = { voice = "string", language = "string?" }