feat: Batch 5-9 — GrowthIntegration桥接、验证补全、死代码清理、Pipeline模板、Speech/Twitter真实实现

Batch 5 (P0): GrowthIntegration 接入 Tauri - Kernel 新增 set_viking()/set_extraction_driver() 桥接 SqliteStorage - 中间件链共享存储，MemoryExtractor 接入 LLM 驱动 Batch 6 (P1): 输入验证 + Heartbeat - Relay 验证补全（stream 兼容检查、API key 格式校验） - UUID 类型校验、SessionId 错误返回 - Heartbeat 默认开启 + 首次聊天自动初始化 Batch 7 (P2): 死代码清理 - zclaw-channels 整体移除（317 行） - multi-agent 特性门控、admin 方法标注 Batch 8 (P2): Pipeline 模板 - PipelineMetadata 新增 annotations 字段 - pipeline_templates 命令 + 2 个示例模板 - fallback driver base_url 修复（doubao/qwen/deepseek 端点） Batch 9 (P1): SpeechHand/TwitterHand 真实实现 - SpeechHand: tts_method 字段 + Browser TTS 前端集成 (Web Speech API) - TwitterHand: 12 个 action 全部替换为 Twitter API v2 真实 HTTP 调用 - chatStore/useAutomationEvents 双路径 TTS 触发
2026-03-30 09:24:50 +08:00
parent 5595083b96
commit 13c0b18bbc
39 changed files with 1155 additions and 507 deletions
--- a/desktop/src/components/HeartbeatConfig.tsx
+++ b/desktop/src/components/HeartbeatConfig.tsx
@@ -35,7 +35,7 @@ import {
 // === Default Config ===

 const DEFAULT_HEARTBEAT_CONFIG: HeartbeatConfigType = {
-  enabled: false,
+  enabled: true,
  interval_minutes: 30,
  quiet_hours_start: null,
  quiet_hours_end: null,
--- a/desktop/src/hooks/useAutomationEvents.ts
+++ b/desktop/src/hooks/useAutomationEvents.ts
@@ -12,6 +12,7 @@ import { useHandStore } from '../store/handStore';
 import { useWorkflowStore } from '../store/workflowStore';
 import { useChatStore } from '../store/chatStore';
 import type { GatewayClient } from '../lib/gateway-client';
+import { speechSynth } from '../lib/speech-synth';

 // === Event Types ===

@@ -161,6 +162,23 @@ export function useAutomationEvents(
          handResult: eventData.hand_result,
          runId: eventData.run_id,
        });
+
+        // Trigger browser TTS for SpeechHand results
+        if (eventData.hand_name === 'speech' && eventData.hand_result && typeof eventData.hand_result === 'object') {
+          const res = eventData.hand_result as Record<string, unknown>;
+          if (res.tts_method === 'browser' && typeof res.text === 'string' && res.text) {
+            speechSynth.speak({
+              text: res.text,
+              voice: typeof res.voice === 'string' ? res.voice : undefined,
+              language: typeof res.language === 'string' ? res.language : undefined,
+              rate: typeof res.rate === 'number' ? res.rate : undefined,
+              pitch: typeof res.pitch === 'number' ? res.pitch : undefined,
+              volume: typeof res.volume === 'number' ? res.volume : undefined,
+            }).catch((err: unknown) => {
+              console.warn('[useAutomationEvents] Browser TTS failed:', err);
+            });
+          }
+        }
      }

      // Handle error status
--- a/desktop/src/lib/saas-client.ts
+++ b/desktop/src/lib/saas-client.ts
@@ -920,6 +920,12 @@ export class SaaSClient {
    return this.request('GET', '/api/v1/config/pull' + qs);
  }

+  // ==========================================================================
+  // Admin Panel API — Reserved for future admin UI (Next.js admin dashboard)
+  // These methods are not called by the desktop app but are kept as thin API
+  // wrappers for when the admin panel is built.
+  // ==========================================================================
+
  // --- Provider Management (Admin) ---

  /** List all providers */
--- a/desktop/src/lib/speech-synth.ts
+++ b/desktop/src/lib/speech-synth.ts
@@ -0,0 +1,195 @@
+/**
+ * Speech Synthesis Service — Browser TTS via Web Speech API
+ *
+ * Provides text-to-speech playback using the browser's native SpeechSynthesis API.
+ * Zero external dependencies, works offline, supports Chinese and English voices.
+ *
+ * Architecture:
+ * - SpeechHand (Rust) returns tts_method + text + voice config
+ * - This service handles Browser TTS playback in the webview
+ * - OpenAI/Azure TTS is handled via backend API calls
+ */
+
+export interface SpeechSynthOptions {
+  text: string;
+  voice?: string;
+  language?: string;
+  rate?: number;
+  pitch?: number;
+  volume?: number;
+}
+
+export interface SpeechSynthState {
+  playing: boolean;
+  paused: boolean;
+  currentText: string | null;
+  voices: SpeechSynthesisVoice[];
+}
+
+type SpeechEventCallback = (state: SpeechSynthState) => void;
+
+class SpeechSynthService {
+  private synth: SpeechSynthesis | null = null;
+  private currentUtterance: SpeechSynthesisUtterance | null = null;
+  private listeners: Set<SpeechEventCallback> = new Set();
+  private cachedVoices: SpeechSynthesisVoice[] = [];
+
+  constructor() {
+    if (typeof window !== 'undefined' && window.speechSynthesis) {
+      this.synth = window.speechSynthesis;
+      this.loadVoices();
+      // Voices may load asynchronously
+      this.synth.onvoiceschanged = () => this.loadVoices();
+    }
+  }
+
+  private loadVoices() {
+    if (!this.synth) return;
+    this.cachedVoices = this.synth.getVoices();
+    this.notify();
+  }
+
+  private notify() {
+    const state = this.getState();
+    this.listeners.forEach(cb => cb(state));
+  }
+
+  /** Subscribe to state changes */
+  subscribe(callback: SpeechEventCallback): () => void {
+    this.listeners.add(callback);
+    return () => this.listeners.delete(callback);
+  }
+
+  /** Get current state */
+  getState(): SpeechSynthState {
+    return {
+      playing: this.synth?.speaking ?? false,
+      paused: this.synth?.paused ?? false,
+      currentText: this.currentUtterance?.text ?? null,
+      voices: this.cachedVoices,
+    };
+  }
+
+  /** Check if TTS is available */
+  isAvailable(): boolean {
+    return this.synth != null;
+  }
+
+  /** Get available voices, optionally filtered by language */
+  getVoices(language?: string): SpeechSynthesisVoice[] {
+    if (!language) return this.cachedVoices;
+    const langPrefix = language.split('-')[0].toLowerCase();
+    return this.cachedVoices.filter(v =>
+      v.lang.toLowerCase().startsWith(langPrefix)
+    );
+  }
+
+  /** Speak text with given options */
+  speak(options: SpeechSynthOptions): Promise<void> {
+    return new Promise((resolve, reject) => {
+      if (!this.synth) {
+        reject(new Error('Speech synthesis not available'));
+        return;
+      }
+
+      // Cancel any ongoing speech
+      this.stop();
+
+      const utterance = new SpeechSynthesisUtterance(options.text);
+      this.currentUtterance = utterance;
+
+      // Set language
+      utterance.lang = options.language ?? 'zh-CN';
+
+      // Set voice if specified
+      if (options.voice && options.voice !== 'default') {
+        const voice = this.cachedVoices.find(v =>
+          v.name === options.voice || v.voiceURI === options.voice
+        );
+        if (voice) utterance.voice = voice;
+      } else {
+        // Auto-select best voice for the language
+        this.selectBestVoice(utterance, options.language ?? 'zh-CN');
+      }
+
+      // Set parameters
+      utterance.rate = options.rate ?? 1.0;
+      utterance.pitch = options.pitch ?? 1.0;
+      utterance.volume = options.volume ?? 1.0;
+
+      utterance.onstart = () => {
+        this.notify();
+      };
+
+      utterance.onend = () => {
+        this.currentUtterance = null;
+        this.notify();
+        resolve();
+      };
+
+      utterance.onerror = (event) => {
+        this.currentUtterance = null;
+        this.notify();
+        // "canceled" is not a real error (happens on stop())
+        if (event.error !== 'canceled') {
+          reject(new Error(`Speech error: ${event.error}`));
+        } else {
+          resolve();
+        }
+      };
+
+      this.synth.speak(utterance);
+    });
+  }
+
+  /** Pause current speech */
+  pause() {
+    this.synth?.pause();
+    this.notify();
+  }
+
+  /** Resume paused speech */
+  resume() {
+    this.synth?.resume();
+    this.notify();
+  }
+
+  /** Stop current speech */
+  stop() {
+    this.synth?.cancel();
+    this.currentUtterance = null;
+    this.notify();
+  }
+
+  /** Auto-select the best voice for a language */
+  private selectBestVoice(utterance: SpeechSynthesisUtterance, language: string) {
+    const langPrefix = language.split('-')[0].toLowerCase();
+    const candidates = this.cachedVoices.filter(v =>
+      v.lang.toLowerCase().startsWith(langPrefix)
+    );
+
+    if (candidates.length === 0) return;
+
+    // Prefer voices with "Neural" or "Enhanced" in name (higher quality)
+    const neural = candidates.find(v =>
+      v.name.includes('Neural') || v.name.includes('Enhanced') || v.name.includes('Premium')
+    );
+    if (neural) {
+      utterance.voice = neural;
+      return;
+    }
+
+    // Prefer local voices (work offline)
+    const local = candidates.find(v => v.localService);
+    if (local) {
+      utterance.voice = local;
+      return;
+    }
+
+    // Fall back to first matching voice
+    utterance.voice = candidates[0];
+  }
+}
+
+// Singleton instance
+export const speechSynth = new SpeechSynthService();
--- a/desktop/src/store/chatStore.ts
+++ b/desktop/src/store/chatStore.ts
@@ -8,6 +8,7 @@ import { getSkillDiscovery } from '../lib/skill-discovery';
 import { useOfflineStore, isOffline } from './offlineStore';
 import { useConnectionStore } from './connectionStore';
 import { createLogger } from '../lib/logger';
+import { speechSynth } from '../lib/speech-synth';
 import { generateRandomString } from '../lib/crypto-utils';

 const log = createLogger('ChatStore');
@@ -461,6 +462,24 @@ export const useChatStore = create<ChatState>()(
              handResult: result,
            };
            set((state) => ({ messages: [...state.messages, handMsg] }));
+
+            // Trigger browser TTS when SpeechHand completes with browser method
+            if (name === 'speech' && status === 'completed' && result && typeof result === 'object') {
+              const res = result as Record<string, unknown>;
+              if (res.tts_method === 'browser' && typeof res.text === 'string' && res.text) {
+                speechSynth.speak({
+                  text: res.text as string,
+                  voice: (res.voice as string) || undefined,
+                  language: (res.language as string) || undefined,
+                  rate: typeof res.rate === 'number' ? res.rate : undefined,
+                  pitch: typeof res.pitch === 'number' ? res.pitch : undefined,
+                  volume: typeof res.volume === 'number' ? res.volume : undefined,
+                }).catch((err: unknown) => {
+                  const logger = createLogger('speech-synth');
+                  logger.warn('Browser TTS failed', { error: String(err) });
+                });
+              }
+            }
          },
          onComplete: (inputTokens?: number, outputTokens?: number) => {
            const state = get();