feat(kernel): persist agent runtime state across restarts
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
- Schema: migrations now execute ALTER TABLE ADD COLUMN for state/message_count - MemoryStore: add update_agent_runtime() and list_agents_with_runtime() - Registry: add register_with_runtime() to accept persisted state/message_count - Kernel boot: restore agents with their persisted state (not always Running) - Kernel shutdown: persist all agent states/message_counts before terminating Agents that were suspended stay suspended after restart. Message counts survive restarts instead of resetting to 0.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
//! Agent CRUD operations
|
||||
|
||||
use zclaw_types::{AgentConfig, AgentId, AgentInfo, Event, Result};
|
||||
use zclaw_types::{AgentConfig, AgentId, AgentInfo, AgentState, Event, Result};
|
||||
|
||||
#[cfg(feature = "multi-agent")]
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -12,7 +12,7 @@ mod a2a;
|
||||
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::{broadcast, Mutex};
|
||||
use zclaw_types::{Event, Result};
|
||||
use zclaw_types::{Event, Result, AgentState};
|
||||
|
||||
#[cfg(feature = "multi-agent")]
|
||||
use zclaw_types::AgentId;
|
||||
@@ -114,10 +114,21 @@ impl Kernel {
|
||||
// Initialize Growth system — shared VikingAdapter for memory storage
|
||||
let viking = Arc::new(zclaw_runtime::VikingAdapter::in_memory());
|
||||
|
||||
// Restore persisted agents
|
||||
let persisted = memory.list_agents().await?;
|
||||
for agent in persisted {
|
||||
registry.register(agent);
|
||||
// Restore persisted agents with their runtime state
|
||||
let persisted = memory.list_agents_with_runtime().await?;
|
||||
for (agent, state_str, msg_count) in persisted {
|
||||
let state = match state_str.as_str() {
|
||||
"running" => AgentState::Running,
|
||||
"suspended" => AgentState::Suspended,
|
||||
_ => AgentState::Terminated,
|
||||
};
|
||||
// Only auto-resume agents that were running; suspended/terminated stay as-is
|
||||
let restored_state = if state == AgentState::Running {
|
||||
AgentState::Running
|
||||
} else {
|
||||
state
|
||||
};
|
||||
registry.register_with_runtime(agent, restored_state, msg_count);
|
||||
}
|
||||
|
||||
// Initialize A2A router for multi-agent support
|
||||
@@ -287,6 +298,23 @@ impl Kernel {
|
||||
|
||||
/// Shutdown the kernel
|
||||
pub async fn shutdown(&self) -> Result<()> {
|
||||
// Persist all agent runtime states before shutdown
|
||||
let agents = self.registry.list();
|
||||
for info in &agents {
|
||||
let state_str = match info.state {
|
||||
AgentState::Running => "running",
|
||||
AgentState::Suspended => "suspended",
|
||||
AgentState::Terminated => "terminated",
|
||||
};
|
||||
if let Err(e) = self.memory
|
||||
.update_agent_runtime(&info.id, state_str, info.message_count as u64)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("[Kernel] Failed to persist agent {} state: {}", info.id, e);
|
||||
}
|
||||
}
|
||||
tracing::info!("[Kernel] Persisted runtime state for {} agents", agents.len());
|
||||
|
||||
self.events.publish(Event::KernelShutdown);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user