refactor(desktop): ChatStore structured split + IDB persistence + stream cancel

Split monolithic chatStore.ts (908 lines) into 4 focused stores: - chatStore.ts: facade layer, owns messages[], backward-compatible selectors - conversationStore.ts: conversation CRUD, agent switching, IndexedDB persistence - streamStore.ts: streaming orchestration, chat mode, suggestions - messageStore.ts: token tracking Key fixes from 3-round deep audit: - C1: Fix Rust serde camelCase vs TS snake_case mismatch (toolStart/toolEnd/iterationStart) - C2: Fix IDB async rehydration race with persist.hasHydrated() subscribe - C3: Add sessionKey to partialize to survive page refresh - H3: Fix IDB migration retry on failure (don't set migrated=true in catch) - M3: Fix ToolCallStep deduplication (toolStart creates, toolEnd updates) - M-NEW-2: Clear sessionKey on cancelStream Also adds: - Rust backend stream cancellation via AtomicBool + cancel_stream command - IndexedDB storage adapter with one-time localStorage migration - HMR cleanup for cross-store subscriptions
2026-04-03 00:24:16 +08:00
parent da438ad868
commit 0a04b260a4
22 changed files with 1269 additions and 767 deletions
--- a/desktop/src-tauri/src/kernel_commands/a2a.rs
+++ b/desktop/src-tauri/src/kernel_commands/a2a.rs
@@ -12,6 +12,7 @@ use super::KernelState;

 #[cfg(feature = "multi-agent")]
 /// Send a direct A2A message from one agent to another
+// @connected
 #[tauri::command]
 pub async fn agent_a2a_send(
    state: State<'_, KernelState>,
@@ -44,6 +45,7 @@ pub async fn agent_a2a_send(

 /// Broadcast a message from one agent to all other agents
 #[cfg(feature = "multi-agent")]
+// @connected
 #[tauri::command]
 pub async fn agent_a2a_broadcast(
    state: State<'_, KernelState>,
@@ -65,6 +67,7 @@ pub async fn agent_a2a_broadcast(

 /// Discover agents with a specific capability
 #[cfg(feature = "multi-agent")]
+// @connected
 #[tauri::command]
 pub async fn agent_a2a_discover(
    state: State<'_, KernelState>,
@@ -86,6 +89,7 @@ pub async fn agent_a2a_discover(

 /// Delegate a task to another agent and wait for response
 #[cfg(feature = "multi-agent")]
+// @connected
 #[tauri::command]
 pub async fn agent_a2a_delegate_task(
    state: State<'_, KernelState>,
--- a/desktop/src-tauri/src/kernel_commands/agent.rs
+++ b/desktop/src-tauri/src/kernel_commands/agent.rs
@@ -65,6 +65,7 @@ pub struct AgentUpdateRequest {
 // ---------------------------------------------------------------------------

 /// Create a new agent
+// @connected
 #[tauri::command]
 pub async fn agent_create(
    state: State<'_, KernelState>,
@@ -103,6 +104,7 @@ pub async fn agent_create(
 }

 /// List all agents
+// @connected
 #[tauri::command]
 pub async fn agent_list(
    state: State<'_, KernelState>,
@@ -116,6 +118,7 @@ pub async fn agent_list(
 }

 /// Get agent info
+// @connected
 #[tauri::command]
 pub async fn agent_get(
    state: State<'_, KernelState>,
@@ -135,6 +138,7 @@ pub async fn agent_get(
 }

 /// Delete an agent
+// @connected
 #[tauri::command]
 pub async fn agent_delete(
    state: State<'_, KernelState>,
@@ -156,6 +160,7 @@ pub async fn agent_delete(
 }

 /// Update an agent's configuration
+// @connected
 #[tauri::command]
 pub async fn agent_update(
    state: State<'_, KernelState>,
@@ -209,6 +214,7 @@ pub async fn agent_update(
 }

 /// Export an agent configuration as JSON
+// @reserved: 暂无前端集成
 #[tauri::command]
 pub async fn agent_export(
    state: State<'_, KernelState>,
@@ -231,6 +237,7 @@ pub async fn agent_export(
 }

 /// Import an agent from JSON configuration
+// @reserved: 暂无前端集成
 #[tauri::command]
 pub async fn agent_import(
    state: State<'_, KernelState>,
--- a/desktop/src-tauri/src/kernel_commands/approval.rs
+++ b/desktop/src-tauri/src/kernel_commands/approval.rs
@@ -25,6 +25,7 @@ pub struct ApprovalResponse {
 }

 /// List pending approvals
+// @reserved: 暂无前端集成
 #[tauri::command]
 pub async fn approval_list(
    state: State<'_, KernelState>,
@@ -48,6 +49,7 @@ pub async fn approval_list(
 /// When approved, the kernel's `respond_to_approval` internally spawns the Hand
 /// execution.  We additionally emit Tauri events so the frontend can track when
 /// the execution finishes, since the kernel layer has no access to the AppHandle.
+// @connected
 #[tauri::command]
 pub async fn approval_respond(
    app: AppHandle,
--- a/desktop/src-tauri/src/kernel_commands/chat.rs
+++ b/desktop/src-tauri/src/kernel_commands/chat.rs
@@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
 use tauri::{AppHandle, Emitter, State};
 use zclaw_types::AgentId;

-use super::{validate_agent_id, KernelState, SessionStreamGuard};
+use super::{validate_agent_id, KernelState, SessionStreamGuard, StreamCancelFlags};
 use crate::intelligence::validation::validate_string_length;

 // ---------------------------------------------------------------------------
@@ -67,6 +67,7 @@ pub struct StreamChatRequest {
 // ---------------------------------------------------------------------------

 /// Send a message to an agent
+// @connected
 #[tauri::command]
 pub async fn agent_chat(
    state: State<'_, KernelState>,
@@ -99,6 +100,7 @@ pub async fn agent_chat(
 /// This command initiates a streaming chat session. Events are emitted
 /// via Tauri's event system with the name "stream:chunk" and include
 /// the session_id for routing.
+// @connected
 #[tauri::command]
 pub async fn agent_chat_stream(
    app: AppHandle,
@@ -107,6 +109,7 @@ pub async fn agent_chat_stream(
    heartbeat_state: State<'_, crate::intelligence::HeartbeatEngineState>,
    reflection_state: State<'_, crate::intelligence::ReflectionEngineState>,
    stream_guard: State<'_, SessionStreamGuard>,
+    cancel_flags: State<'_, StreamCancelFlags>,
    request: StreamChatRequest,
 ) -> Result<(), String> {
    validate_agent_id(&request.agent_id)?;
@@ -136,6 +139,21 @@ pub async fn agent_chat_stream(
        return Err(format!("Session {} already has an active stream", session_id));
    }

+    // Prepare cleanup resources for error paths (before spawn takes ownership)
+    let err_cleanup_guard = stream_guard.inner().clone();
+    let err_cleanup_cancel = cancel_flags.inner().clone();
+    let err_cleanup_session_id = session_id.clone();
+    let err_cleanup_flag = Arc::clone(&*session_active);
+
+    // Register cancellation flag for this session
+    let cancel_flag = cancel_flags
+        .entry(session_id.clone())
+        .or_insert_with(|| Arc::new(std::sync::atomic::AtomicBool::new(false)));
+    // Ensure flag is reset (in case of stale entry from a previous stream)
+    cancel_flag.store(false, std::sync::atomic::Ordering::SeqCst);
+    let cancel_clone = Arc::clone(&*cancel_flag);
+    let cancel_flags_map: StreamCancelFlags = cancel_flags.inner().clone();
+
    // AUTO-INIT HEARTBEAT
    {
        let mut engines = heartbeat_state.lock().await;
@@ -160,7 +178,13 @@ pub async fn agent_chat_stream(
    let (mut rx, llm_driver) = {
        let kernel_lock = state.lock().await;
        let kernel = kernel_lock.as_ref()
-            .ok_or_else(|| "Kernel not initialized. Call kernel_init first.".to_string())?;
+            .ok_or_else(|| {
+                // Cleanup on error: release guard + cancel flag
+                err_cleanup_flag.store(false, std::sync::atomic::Ordering::SeqCst);
+                err_cleanup_guard.remove(&err_cleanup_session_id);
+                err_cleanup_cancel.remove(&err_cleanup_session_id);
+                "Kernel not initialized. Call kernel_init first.".to_string()
+            })?;

        let driver = Some(kernel.driver());

@@ -172,6 +196,10 @@ pub async fn agent_chat_stream(
            match uuid::Uuid::parse_str(&session_id) {
                Ok(uuid) => Some(zclaw_types::SessionId::from_uuid(uuid)),
                Err(e) => {
+                    // Cleanup on error
+                    err_cleanup_flag.store(false, std::sync::atomic::Ordering::SeqCst);
+                    err_cleanup_guard.remove(&err_cleanup_session_id);
+                    err_cleanup_cancel.remove(&err_cleanup_session_id);
                    return Err(format!(
                        "Invalid session_id '{}': {}. Cannot reuse conversation context.",
                        session_id, e
@@ -194,13 +222,22 @@ pub async fn agent_chat_stream(
            Some(chat_mode_config),
        )
            .await
-            .map_err(|e| format!("Failed to start streaming: {}", e))?;
+            .map_err(|e| {
+                // Cleanup on error
+                err_cleanup_flag.store(false, std::sync::atomic::Ordering::SeqCst);
+                err_cleanup_guard.remove(&err_cleanup_session_id);
+                err_cleanup_cancel.remove(&err_cleanup_session_id);
+                format!("Failed to start streaming: {}", e)
+            })?;
        (rx, driver)
    };

    let hb_state = heartbeat_state.inner().clone();
    let rf_state = reflection_state.inner().clone();

+    // Clone the guard map for cleanup in the spawned task
+    let guard_map: SessionStreamGuard = stream_guard.inner().clone();
+
    // Spawn a task to process stream events.
    // The session_active flag is cleared when task completes.
    let guard_clone = Arc::clone(&*session_active);
@@ -212,6 +249,16 @@ pub async fn agent_chat_stream(
        let stream_timeout = tokio::time::Duration::from_secs(300);

        loop {
+            // Check cancellation flag before each recv
+            if cancel_clone.load(std::sync::atomic::Ordering::SeqCst) {
+                tracing::info!("[agent_chat_stream] Stream cancelled for session: {}", session_id);
+                let _ = app.emit("stream:chunk", serde_json::json!({
+                    "sessionId": session_id,
+                    "event": StreamChatEvent::Error { message: "已取消".to_string() }
+                }));
+                break;
+            }
+
            match tokio::time::timeout(stream_timeout, rx.recv()).await {
                Ok(Some(event)) => {
                    let stream_event = match &event {
@@ -300,9 +347,37 @@ pub async fn agent_chat_stream(

        tracing::debug!("[agent_chat_stream] Stream processing ended for session: {}", session_id);

-        // Release session lock
-        guard_clone.store(false, std::sync::atomic::Ordering::SeqCst);
+        // Release session lock and clean up DashMap entries to prevent memory leaks.
+        // Use compare_exchange to only remove if we still own the flag (guards against
+        // a new stream for the same session_id starting after we broke out of the loop).
+        if guard_clone.compare_exchange(true, false, std::sync::atomic::Ordering::SeqCst, std::sync::atomic::Ordering::SeqCst).is_ok() {
+            guard_map.remove(&session_id);
+        }
+        // Clean up cancellation flag (always safe — cancel is session-scoped)
+        cancel_flags_map.remove(&session_id);
    });

    Ok(())
 }
+
+/// Cancel an active stream for a given session.
+///
+/// Sets the cancellation flag for the session, which the streaming task
+/// checks on each iteration. The task will then emit an error event
+/// and clean up.
+// @connected
+#[tauri::command]
+pub async fn cancel_stream(
+    cancel_flags: State<'_, StreamCancelFlags>,
+    session_id: String,
+) -> Result<(), String> {
+    if let Some(flag) = cancel_flags.get(&session_id) {
+        flag.store(true, std::sync::atomic::Ordering::SeqCst);
+        tracing::info!("[cancel_stream] Cancel requested for session: {}", session_id);
+        Ok(())
+    } else {
+        // No active stream for this session — not an error, just a no-op
+        tracing::debug!("[cancel_stream] No active stream for session: {}", session_id);
+        Ok(())
+    }
+}
--- a/desktop/src-tauri/src/kernel_commands/hand.rs
+++ b/desktop/src-tauri/src/kernel_commands/hand.rs
@@ -110,6 +110,7 @@ impl From<zclaw_hands::HandResult> for HandResult {
 ///
 /// Returns hands from the Kernel's HandRegistry.
 /// Hands are registered during kernel initialization.
+// @connected
 #[tauri::command]
 pub async fn hand_list(
    state: State<'_, KernelState>,
@@ -128,6 +129,7 @@ pub async fn hand_list(
 /// Executes a hand with the given ID and input.
 /// If the hand has `needs_approval = true`, creates a pending approval instead.
 /// Returns the hand result as JSON, or a pending status with approval ID.
+// @connected
 #[tauri::command]
 pub async fn hand_execute(
    state: State<'_, KernelState>,
@@ -190,6 +192,7 @@ pub async fn hand_execute(
 /// When approved, the kernel's `respond_to_approval` internally spawns the Hand
 /// execution.  We additionally emit Tauri events so the frontend can track when
 /// the execution finishes.
+// @connected
 #[tauri::command]
 pub async fn hand_approve(
    app: AppHandle,
@@ -292,6 +295,7 @@ pub async fn hand_approve(
 }

 /// Cancel a hand execution
+// @connected
 #[tauri::command]
 pub async fn hand_cancel(
    state: State<'_, KernelState>,
@@ -330,6 +334,7 @@ pub async fn hand_cancel(
 // ============================================================

 /// Get detailed info for a single hand
+// @connected
 #[tauri::command]
 pub async fn hand_get(
    state: State<'_, KernelState>,
@@ -348,6 +353,7 @@ pub async fn hand_get(
 }

 /// Get status of a specific hand run
+// @connected
 #[tauri::command]
 pub async fn hand_run_status(
    state: State<'_, KernelState>,
@@ -375,6 +381,7 @@ pub async fn hand_run_status(
 }

 /// List run history for a hand (or all hands)
+// @connected
 #[tauri::command]
 pub async fn hand_run_list(
    state: State<'_, KernelState>,
@@ -409,6 +416,7 @@ pub async fn hand_run_list(
 }

 /// Cancel a running hand execution
+// @reserved: 暂无前端集成
 #[tauri::command]
 pub async fn hand_run_cancel(
    state: State<'_, KernelState>,
--- a/desktop/src-tauri/src/kernel_commands/lifecycle.rs
+++ b/desktop/src-tauri/src/kernel_commands/lifecycle.rs
@@ -54,6 +54,7 @@ pub struct KernelStatusResponse {
 ///
 /// If kernel already exists with the same config, returns existing status.
 /// If config changed, reboots kernel with new config.
+// @connected
 #[tauri::command]
 pub async fn kernel_init(
    state: State<'_, KernelState>,
@@ -202,6 +203,7 @@ pub async fn kernel_init(
 }

 /// Get kernel status
+// @connected
 #[tauri::command]
 pub async fn kernel_status(
    state: State<'_, KernelState>,
@@ -227,6 +229,7 @@ pub async fn kernel_status(
 }

 /// Shutdown the kernel
+// @reserved: 暂无前端集成
 #[tauri::command]
 pub async fn kernel_shutdown(
    state: State<'_, KernelState>,
@@ -254,6 +257,7 @@ pub async fn kernel_shutdown(
 ///
 /// Writes relevant config values (agent, llm categories) to the TOML config file.
 /// The changes take effect on the next Kernel restart.
+// @connected
 #[tauri::command]
 pub async fn kernel_apply_saas_config(
    configs: Vec<SaasConfigItem>,
--- a/desktop/src-tauri/src/kernel_commands/mod.rs
+++ b/desktop/src-tauri/src/kernel_commands/mod.rs
@@ -36,6 +36,11 @@ pub type SchedulerState = Arc<Mutex<Option<zclaw_kernel::scheduler::SchedulerSer
 /// The `spawn`ed task resets the flag on completion/error.
 pub type SessionStreamGuard = Arc<dashmap::DashMap<String, Arc<std::sync::atomic::AtomicBool>>>;

+/// Per-session stream cancellation flags.
+/// When a user cancels a stream, the flag for that session_id is set to `true`.
+/// The spawned `agent_chat_stream` task checks this flag each iteration.
+pub type StreamCancelFlags = Arc<dashmap::DashMap<String, Arc<std::sync::atomic::AtomicBool>>>;
+
 // ---------------------------------------------------------------------------
 // Shared validation helpers
 // ---------------------------------------------------------------------------
--- a/desktop/src-tauri/src/kernel_commands/scheduled_task.rs
+++ b/desktop/src-tauri/src/kernel_commands/scheduled_task.rs
@@ -47,6 +47,7 @@ pub struct ScheduledTaskResponse {
 ///
 /// Tasks are automatically executed by the SchedulerService which checks
 /// every 60 seconds for due triggers.
+// @reserved: 暂无前端集成
 #[tauri::command]
 pub async fn scheduled_task_create(
    state: State<'_, KernelState>,
@@ -94,6 +95,7 @@ pub async fn scheduled_task_create(
 }

 /// List all scheduled tasks (kernel triggers of Schedule type)
+// @connected
 #[tauri::command]
 pub async fn scheduled_task_list(
    state: State<'_, KernelState>,
--- a/desktop/src-tauri/src/kernel_commands/skill.rs
+++ b/desktop/src-tauri/src/kernel_commands/skill.rs
@@ -53,6 +53,7 @@ impl From<zclaw_skills::SkillManifest> for SkillInfoResponse {
 ///
 /// Returns skills from the Kernel's SkillRegistry.
 /// Skills are loaded from the skills/ directory during kernel initialization.
+// @connected
 #[tauri::command]
 pub async fn skill_list(
    state: State<'_, KernelState>,
@@ -74,6 +75,7 @@ pub async fn skill_list(
 ///
 /// Re-scans the skills directory for new or updated skills.
 /// Optionally accepts a custom directory path to scan.
+// @connected
 #[tauri::command]
 pub async fn skill_refresh(
    state: State<'_, KernelState>,
@@ -124,6 +126,7 @@ pub struct UpdateSkillRequest {
 }

 /// Create a new skill in the skills directory
+// @connected
 #[tauri::command]
 pub async fn skill_create(
    state: State<'_, KernelState>,
@@ -170,6 +173,7 @@ pub async fn skill_create(
 }

 /// Update an existing skill
+// @connected
 #[tauri::command]
 pub async fn skill_update(
    state: State<'_, KernelState>,
@@ -214,6 +218,7 @@ pub async fn skill_update(
 }

 /// Delete a skill
+// @connected
 #[tauri::command]
 pub async fn skill_delete(
    state: State<'_, KernelState>,
@@ -286,6 +291,7 @@ impl From<zclaw_skills::SkillResult> for SkillResult {
 ///
 /// Executes a skill with the given ID and input.
 /// Returns the skill result as JSON.
+// @connected
 #[tauri::command]
 pub async fn skill_execute(
    state: State<'_, KernelState>,
--- a/desktop/src-tauri/src/kernel_commands/trigger.rs
+++ b/desktop/src-tauri/src/kernel_commands/trigger.rs
@@ -96,6 +96,7 @@ impl From<zclaw_kernel::trigger_manager::TriggerEntry> for TriggerResponse {
 }

 /// List all triggers
+// @connected
 #[tauri::command]
 pub async fn trigger_list(
    state: State<'_, KernelState>,
@@ -109,6 +110,7 @@ pub async fn trigger_list(
 }

 /// Get a specific trigger
+// @connected
 #[tauri::command]
 pub async fn trigger_get(
    state: State<'_, KernelState>,
@@ -125,6 +127,7 @@ pub async fn trigger_get(
 }

 /// Create a new trigger
+// @connected
 #[tauri::command]
 pub async fn trigger_create(
    state: State<'_, KernelState>,
@@ -179,6 +182,7 @@ pub async fn trigger_create(
 }

 /// Update a trigger
+// @connected
 #[tauri::command]
 pub async fn trigger_update(
    state: State<'_, KernelState>,
@@ -205,6 +209,7 @@ pub async fn trigger_update(
 }

 /// Delete a trigger
+// @connected
 #[tauri::command]
 pub async fn trigger_delete(
    state: State<'_, KernelState>,
@@ -222,6 +227,7 @@ pub async fn trigger_delete(
 }

 /// Execute a trigger manually
+// @reserved: 暂无前端集成
 #[tauri::command]
 pub async fn trigger_execute(
    state: State<'_, KernelState>,
--- a/desktop/src-tauri/src/lib.rs
+++ b/desktop/src-tauri/src/lib.rs
@@ -117,6 +117,7 @@ pub fn run() {
        .manage(kernel_state)
        .manage(scheduler_state)
        .manage(kernel_commands::SessionStreamGuard::default())
+        .manage(kernel_commands::StreamCancelFlags::default())
        .manage(pipeline_state)
        .manage(classroom_state)
        .manage(classroom_chat_state)
@@ -136,6 +137,7 @@ pub fn run() {
            kernel_commands::agent::agent_import,
            kernel_commands::chat::agent_chat,
            kernel_commands::chat::agent_chat_stream,
+            kernel_commands::chat::cancel_stream,
            // Skills commands (dynamic discovery)
            kernel_commands::skill::skill_list,
            kernel_commands::skill::skill_refresh,