fix(heartbeat,skills): 健康快照降级处理 + 技能加载重试
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

P1-3: health_snapshot 在 heartbeat engine 未初始化时不再报错,
返回 pending 状态快照,避免 HealthPanel 竞态报错。

P1-1: loadSkillsCatalog 新增 Path C 延迟重试(最多2次,间隔
1.5s/3s),解决 kernel 初始化未完成时 skills 返回空数组的问题。
This commit is contained in:
iven
2026-04-19 13:27:25 +08:00
parent 4ee587d070
commit 0bd50aad8c
2 changed files with 35 additions and 5 deletions

View File

@@ -47,9 +47,30 @@ pub async fn health_snapshot(
) -> Result<HealthSnapshot, String> { ) -> Result<HealthSnapshot, String> {
let engines = heartbeat_state.lock().await; let engines = heartbeat_state.lock().await;
let engine = engines // If heartbeat engine not yet initialized, return a graceful "pending" snapshot
.get(&agent_id) // instead of erroring — this avoids race conditions when HealthPanel mounts
.ok_or_else(|| format!("Heartbeat engine not initialized for agent: {}", agent_id))?; // before the heartbeat bootstrap sequence completes.
let engine = match engines.get(&agent_id) {
Some(e) => e,
None => {
tracing::debug!("[health_snapshot] Engine not initialized for {}, returning pending snapshot", agent_id);
return Ok(HealthSnapshot {
timestamp: chrono::Utc::now().to_rfc3339(),
intelligence: IntelligenceHealth {
engine_running: false,
config: HeartbeatConfig::default(),
last_tick: None,
alert_count_24h: 0,
total_checks: 5,
},
memory: MemoryHealth {
total_entries: 0,
storage_size_bytes: 0,
last_extraction: None,
},
});
}
};
let engine_running = engine.is_running().await; let engine_running = engine.is_running().await;
let config = engine.get_config().await; let config = engine.get_config().await;

View File

@@ -189,7 +189,7 @@ export interface ConfigActionsSlice {
description?: string; description?: string;
enabled?: boolean; enabled?: boolean;
}) => Promise<ScheduledTask | undefined>; }) => Promise<ScheduledTask | undefined>;
loadSkillsCatalog: () => Promise<void>; loadSkillsCatalog: (retryCount?: number) => Promise<void>;
getSkill: (id: string) => Promise<SkillInfo | undefined>; getSkill: (id: string) => Promise<SkillInfo | undefined>;
createSkill: (skill: { createSkill: (skill: {
name: string; name: string;
@@ -449,7 +449,7 @@ export const useConfigStore = create<ConfigStateSlice & ConfigActionsSlice>((set
// === Skill Actions === // === Skill Actions ===
loadSkillsCatalog: async () => { loadSkillsCatalog: async (retryCount = 0) => {
const client = get().client; const client = get().client;
// Path A: via injected client (KernelClient or GatewayClient) // Path A: via injected client (KernelClient or GatewayClient)
@@ -494,10 +494,19 @@ export const useConfigStore = create<ConfigStateSlice & ConfigActionsSlice>((set
source: ((s.source as string) || 'builtin') as 'builtin' | 'extra', source: ((s.source as string) || 'builtin') as 'builtin' | 'extra',
path: s.path as string | undefined, path: s.path as string | undefined,
})) }); })) });
return;
} }
} catch (err) { } catch (err) {
console.warn('[configStore] skill_list direct invoke also failed:', err); console.warn('[configStore] skill_list direct invoke also failed:', err);
} }
// Path C: delayed retry — kernel may still be initializing
if (retryCount < 2) {
const delay = (retryCount + 1) * 1500; // 1.5s, 3s
console.log(`[configStore] Skills empty, retrying in ${delay}ms (attempt ${retryCount + 1}/2)`);
await new Promise((r) => setTimeout(r, delay));
return get().loadSkillsCatalog(retryCount + 1);
}
}, },
getSkill: async (id: string) => { getSkill: async (id: string) => {