release(v0.2.0): streaming, MCP protocol, Browser Hand, security enhancements

## Major Features ### Streaming Response System - Implement LlmDriver trait with `stream()` method returning async Stream - Add SSE parsing for Anthropic and OpenAI API streaming - Integrate Tauri event system for frontend streaming (`stream:chunk` events) - Add StreamChunk types: Delta, ToolStart, ToolEnd, Complete, Error ### MCP Protocol Implementation - Add MCP JSON-RPC 2.0 types (mcp_types.rs) - Implement stdio-based MCP transport (mcp_transport.rs) - Support tool discovery, execution, and resource operations ### Browser Hand Implementation - Complete browser automation with Playwright-style actions - Support Navigate, Click, Type, Scrape, Screenshot, Wait actions - Add educational Hands: Whiteboard, Slideshow, Speech, Quiz ### Security Enhancements - Implement command whitelist/blacklist for shell_exec tool - Add SSRF protection with private IP blocking - Create security.toml configuration file ## Test Improvements - Fix test import paths (security-utils, setup) - Fix vi.mock hoisting issues with vi.hoisted() - Update test expectations for validateUrl and sanitizeFilename - Add getUnsupportedLocalGatewayStatus mock ## Documentation Updates - Update architecture documentation - Improve configuration reference - Add quick-start guide updates Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 03:24:24 +08:00
parent e49ba4460b
commit 3ff08faa56
78 changed files with 29575 additions and 1682 deletions
--- a/hands/speech.HAND.toml
+++ b/hands/speech.HAND.toml
@@ -0,0 +1,127 @@
+# Speech Hand - 语音合成能力包
+#
+# ZCLAW Hand 配置
+# 提供文本转语音 (TTS) 能力，支持多种语音和语言
+
+[hand]
+name = "speech"
+version = "1.0.0"
+description = "语音合成能力包 - 将文本转换为自然语音输出"
+author = "ZCLAW Team"
+
+type = "media"
+requires_approval = false
+timeout = 120
+max_concurrent = 3
+
+tags = ["speech", "tts", "voice", "audio", "education", "accessibility"]
+
+[hand.config]
+# TTS 提供商: browser, azure, openai, elevenlabs, local
+provider = "browser"
+
+# 默认语音
+default_voice = "default"
+
+# 默认语速 (0.5 - 2.0)
+default_rate = 1.0
+
+# 默认音调 (0.5 - 2.0)
+default_pitch = 1.0
+
+# 默认音量 (0 - 1.0)
+default_volume = 1.0
+
+# 语言代码
+default_language = "zh-CN"
+
+# 是否缓存音频
+cache_audio = true
+
+# Azure TTS 配置 (如果 provider = "azure")
+[hand.config.azure]
+# voice_name = "zh-CN-XiaoxiaoNeural"
+# region = "eastasia"
+
+# OpenAI TTS 配置 (如果 provider = "openai")
+[hand.config.openai]
+# model = "tts-1"
+# voice = "alloy"
+
+# 浏览器 TTS 配置 (如果 provider = "browser")
+[hand.config.browser]
+# 使用系统默认语音
+use_system_voice = true
+# 语音名称映射
+voice_mapping = { "zh-CN" = "Microsoft Huihui", "en-US" = "Microsoft David" }
+
+[hand.triggers]
+manual = true
+schedule = false
+webhook = false
+
+[[hand.triggers.events]]
+type = "chat.intent"
+pattern = "朗读|念|说|播放语音|speak|read|say|tts"
+priority = 5
+
+[hand.permissions]
+requires = [
+  "speech.synthesize",
+  "speech.play",
+  "speech.stop"
+]
+
+roles = ["operator.read"]
+
+[hand.rate_limit]
+max_requests = 100
+window_seconds = 3600
+
+[hand.audit]
+log_inputs = true
+log_outputs = false  # 音频不记录
+retention_days = 3
+
+# 语音动作定义
+[[hand.actions]]
+id = "speak"
+name = "朗读文本"
+description = "将文本转换为语音并播放"
+params = { text = "string", voice = "string?", rate = "number?", pitch = "number?" }
+
+[[hand.actions]]
+id = "speak_ssml"
+name = "朗读 SSML"
+description = "使用 SSML 标记朗读文本（支持更精细控制）"
+params = { ssml = "string", voice = "string?" }
+
+[[hand.actions]]
+id = "pause"
+name = "暂停播放"
+description = "暂停当前语音播放"
+params = {}
+
+[[hand.actions]]
+id = "resume"
+name = "继续播放"
+description = "继续暂停的语音播放"
+params = {}
+
+[[hand.actions]]
+id = "stop"
+name = "停止播放"
+description = "停止当前语音播放"
+params = {}
+
+[[hand.actions]]
+id = "list_voices"
+name = "列出可用语音"
+description = "获取可用的语音列表"
+params = { language = "string?" }
+
+[[hand.actions]]
+id = "set_voice"
+name = "设置默认语音"
+description = "更改默认语音设置"
+params = { voice = "string", language = "string?" }