fix(heartbeat,skills): 健康快照降级处理 + 技能加载重试
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
P1-3: health_snapshot 在 heartbeat engine 未初始化时不再报错, 返回 pending 状态快照,避免 HealthPanel 竞态报错。 P1-1: loadSkillsCatalog 新增 Path C 延迟重试(最多2次,间隔 1.5s/3s),解决 kernel 初始化未完成时 skills 返回空数组的问题。
This commit is contained in:
@@ -47,9 +47,30 @@ pub async fn health_snapshot(
|
||||
) -> Result<HealthSnapshot, String> {
|
||||
let engines = heartbeat_state.lock().await;
|
||||
|
||||
let engine = engines
|
||||
.get(&agent_id)
|
||||
.ok_or_else(|| format!("Heartbeat engine not initialized for agent: {}", agent_id))?;
|
||||
// If heartbeat engine not yet initialized, return a graceful "pending" snapshot
|
||||
// instead of erroring — this avoids race conditions when HealthPanel mounts
|
||||
// before the heartbeat bootstrap sequence completes.
|
||||
let engine = match engines.get(&agent_id) {
|
||||
Some(e) => e,
|
||||
None => {
|
||||
tracing::debug!("[health_snapshot] Engine not initialized for {}, returning pending snapshot", agent_id);
|
||||
return Ok(HealthSnapshot {
|
||||
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||
intelligence: IntelligenceHealth {
|
||||
engine_running: false,
|
||||
config: HeartbeatConfig::default(),
|
||||
last_tick: None,
|
||||
alert_count_24h: 0,
|
||||
total_checks: 5,
|
||||
},
|
||||
memory: MemoryHealth {
|
||||
total_entries: 0,
|
||||
storage_size_bytes: 0,
|
||||
last_extraction: None,
|
||||
},
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let engine_running = engine.is_running().await;
|
||||
let config = engine.get_config().await;
|
||||
|
||||
@@ -189,7 +189,7 @@ export interface ConfigActionsSlice {
|
||||
description?: string;
|
||||
enabled?: boolean;
|
||||
}) => Promise<ScheduledTask | undefined>;
|
||||
loadSkillsCatalog: () => Promise<void>;
|
||||
loadSkillsCatalog: (retryCount?: number) => Promise<void>;
|
||||
getSkill: (id: string) => Promise<SkillInfo | undefined>;
|
||||
createSkill: (skill: {
|
||||
name: string;
|
||||
@@ -449,7 +449,7 @@ export const useConfigStore = create<ConfigStateSlice & ConfigActionsSlice>((set
|
||||
|
||||
// === Skill Actions ===
|
||||
|
||||
loadSkillsCatalog: async () => {
|
||||
loadSkillsCatalog: async (retryCount = 0) => {
|
||||
const client = get().client;
|
||||
|
||||
// Path A: via injected client (KernelClient or GatewayClient)
|
||||
@@ -494,10 +494,19 @@ export const useConfigStore = create<ConfigStateSlice & ConfigActionsSlice>((set
|
||||
source: ((s.source as string) || 'builtin') as 'builtin' | 'extra',
|
||||
path: s.path as string | undefined,
|
||||
})) });
|
||||
return;
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('[configStore] skill_list direct invoke also failed:', err);
|
||||
}
|
||||
|
||||
// Path C: delayed retry — kernel may still be initializing
|
||||
if (retryCount < 2) {
|
||||
const delay = (retryCount + 1) * 1500; // 1.5s, 3s
|
||||
console.log(`[configStore] Skills empty, retrying in ${delay}ms (attempt ${retryCount + 1}/2)`);
|
||||
await new Promise((r) => setTimeout(r, delay));
|
||||
return get().loadSkillsCatalog(retryCount + 1);
|
||||
}
|
||||
},
|
||||
|
||||
getSkill: async (id: string) => {
|
||||
|
||||
Reference in New Issue
Block a user