refactor(desktop): split kernel_commands/pipeline_commands into modules, add SaaS client libs and gateway modules
Split monolithic kernel_commands.rs (2185 lines) and pipeline_commands.rs (1391 lines) into focused sub-modules under kernel_commands/ and pipeline_commands/ directories. Add gateway module (commands, config, io, runtime), health_check, and 15 new TypeScript client libraries for SaaS relay, auth, admin, telemetry, and kernel sub-systems (a2a, agent, chat, hands, skills, triggers). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
296
desktop/src-tauri/src/health_check.rs
Normal file
296
desktop/src-tauri/src/health_check.rs
Normal file
@@ -0,0 +1,296 @@
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::net::{TcpStream, ToSocketAddrs};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
use tauri::AppHandle;
|
||||
|
||||
use crate::gateway::io::{parse_json_output, read_gateway_status, run_zclaw, LocalGatewayStatus};
|
||||
use crate::gateway::runtime::{resolve_zclaw_runtime, ZCLAW_DEFAULT_PORT};
|
||||
|
||||
/// Health status enum
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub(crate) enum HealthStatus {
|
||||
Healthy,
|
||||
Unhealthy,
|
||||
}
|
||||
|
||||
/// Port check result
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct PortCheckResult {
|
||||
port: u16,
|
||||
accessible: bool,
|
||||
latency_ms: Option<u64>,
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
/// Process health details
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct ProcessHealthDetails {
|
||||
pid: Option<u32>,
|
||||
name: Option<String>,
|
||||
status: Option<String>,
|
||||
uptime_seconds: Option<u64>,
|
||||
cpu_percent: Option<f64>,
|
||||
memory_mb: Option<f64>,
|
||||
}
|
||||
|
||||
/// Health check response
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct HealthCheckResponse {
|
||||
status: HealthStatus,
|
||||
process: ProcessHealthDetails,
|
||||
port_check: PortCheckResult,
|
||||
last_check_timestamp: u64,
|
||||
checks_performed: Vec<String>,
|
||||
issues: Vec<String>,
|
||||
runtime_source: Option<String>,
|
||||
}
|
||||
|
||||
/// Check if a TCP port is accessible
|
||||
fn check_port_accessibility(host: &str, port: u16, timeout_ms: u64) -> PortCheckResult {
|
||||
let addr = format!("{}:{}", host, port);
|
||||
|
||||
// Resolve the address
|
||||
let socket_addr = match addr.to_socket_addrs() {
|
||||
Ok(mut addrs) => addrs.next(),
|
||||
Err(e) => {
|
||||
return PortCheckResult {
|
||||
port,
|
||||
accessible: false,
|
||||
latency_ms: None,
|
||||
error: Some(format!("Failed to resolve address: {}", e)),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
let Some(socket_addr) = socket_addr else {
|
||||
return PortCheckResult {
|
||||
port,
|
||||
accessible: false,
|
||||
latency_ms: None,
|
||||
error: Some("Failed to resolve address".to_string()),
|
||||
};
|
||||
};
|
||||
|
||||
// Try to connect with timeout
|
||||
let start = Instant::now();
|
||||
|
||||
// Use a simple TCP connect with timeout simulation
|
||||
let result = TcpStream::connect_timeout(&socket_addr, Duration::from_millis(timeout_ms));
|
||||
|
||||
match result {
|
||||
Ok(_) => {
|
||||
let latency = start.elapsed().as_millis() as u64;
|
||||
PortCheckResult {
|
||||
port,
|
||||
accessible: true,
|
||||
latency_ms: Some(latency),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
Err(e) => PortCheckResult {
|
||||
port,
|
||||
accessible: false,
|
||||
latency_ms: None,
|
||||
error: Some(format!("Connection failed: {}", e)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Get process uptime from status command
|
||||
fn get_process_uptime(status: &LocalGatewayStatus) -> Option<u64> {
|
||||
// Try to extract uptime from raw status data
|
||||
status
|
||||
.raw
|
||||
.get("process")
|
||||
.and_then(|p| p.get("uptimeSeconds"))
|
||||
.and_then(Value::as_u64)
|
||||
}
|
||||
|
||||
/// Perform comprehensive health check on ZCLAW Kernel
|
||||
#[tauri::command]
|
||||
pub fn zclaw_health_check(
|
||||
app: AppHandle,
|
||||
port: Option<u16>,
|
||||
timeout_ms: Option<u64>,
|
||||
) -> Result<HealthCheckResponse, String> {
|
||||
let check_port = port.unwrap_or(ZCLAW_DEFAULT_PORT);
|
||||
let timeout = timeout_ms.unwrap_or(3000);
|
||||
let mut checks_performed = Vec::new();
|
||||
let mut issues = Vec::new();
|
||||
|
||||
// Get current timestamp
|
||||
let last_check_timestamp = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0);
|
||||
|
||||
// 1. Check if ZCLAW CLI is available
|
||||
let runtime = resolve_zclaw_runtime(&app);
|
||||
let cli_available = runtime.executable.is_file();
|
||||
|
||||
if !cli_available {
|
||||
return Ok(HealthCheckResponse {
|
||||
status: HealthStatus::Unhealthy,
|
||||
process: ProcessHealthDetails {
|
||||
pid: None,
|
||||
name: None,
|
||||
status: None,
|
||||
uptime_seconds: None,
|
||||
cpu_percent: None,
|
||||
memory_mb: None,
|
||||
},
|
||||
port_check: PortCheckResult {
|
||||
port: check_port,
|
||||
accessible: false,
|
||||
latency_ms: None,
|
||||
error: Some("ZCLAW CLI not available".to_string()),
|
||||
},
|
||||
last_check_timestamp,
|
||||
checks_performed: vec!["cli_availability".to_string()],
|
||||
issues: vec![format!(
|
||||
"ZCLAW runtime not found at: {}",
|
||||
runtime.display_path.display()
|
||||
)],
|
||||
runtime_source: Some(runtime.source),
|
||||
});
|
||||
}
|
||||
checks_performed.push("cli_availability".to_string());
|
||||
|
||||
// 2. Get gateway status
|
||||
let gateway_status = read_gateway_status(&app)?;
|
||||
checks_performed.push("gateway_status".to_string());
|
||||
|
||||
// Check for configuration issues
|
||||
if !gateway_status.config_ok {
|
||||
issues.push("Gateway configuration has issues".to_string());
|
||||
}
|
||||
|
||||
// 3. Check port accessibility
|
||||
let port_check = check_port_accessibility("127.0.0.1", check_port, timeout);
|
||||
checks_performed.push("port_accessibility".to_string());
|
||||
|
||||
if !port_check.accessible {
|
||||
issues.push(format!(
|
||||
"Port {} is not accessible: {}",
|
||||
check_port,
|
||||
port_check.error.as_deref().unwrap_or("unknown error")
|
||||
));
|
||||
}
|
||||
|
||||
// 4. Extract process information
|
||||
let process_health = if !gateway_status.listener_pids.is_empty() {
|
||||
// Get the first listener PID
|
||||
let pid = gateway_status.listener_pids[0];
|
||||
|
||||
// Try to get detailed process info from process list
|
||||
let process_info = run_zclaw(&app, &["process", "list", "--json"])
|
||||
.ok()
|
||||
.and_then(|result| parse_json_output(&result.stdout).ok())
|
||||
.and_then(|json| json.get("processes").and_then(Value::as_array).cloned());
|
||||
|
||||
let (cpu, memory, uptime) = if let Some(ref processes) = process_info {
|
||||
let matching = processes
|
||||
.iter()
|
||||
.find(|p| p.get("pid").and_then(Value::as_u64) == Some(pid as u64));
|
||||
|
||||
matching.map_or((None, None, None), |p| {
|
||||
(
|
||||
p.get("cpuPercent").and_then(Value::as_f64),
|
||||
p.get("memoryMb").and_then(Value::as_f64),
|
||||
p.get("uptimeSeconds").and_then(Value::as_u64),
|
||||
)
|
||||
})
|
||||
} else {
|
||||
(None, None, get_process_uptime(&gateway_status))
|
||||
};
|
||||
|
||||
ProcessHealthDetails {
|
||||
pid: Some(pid),
|
||||
name: Some("zclaw".to_string()),
|
||||
status: Some(
|
||||
gateway_status
|
||||
.service_status
|
||||
.clone()
|
||||
.unwrap_or_else(|| "running".to_string()),
|
||||
),
|
||||
uptime_seconds: uptime,
|
||||
cpu_percent: cpu,
|
||||
memory_mb: memory,
|
||||
}
|
||||
} else {
|
||||
ProcessHealthDetails {
|
||||
pid: None,
|
||||
name: None,
|
||||
status: gateway_status.service_status.clone(),
|
||||
uptime_seconds: None,
|
||||
cpu_percent: None,
|
||||
memory_mb: None,
|
||||
}
|
||||
};
|
||||
|
||||
// Check if process is running but no listeners
|
||||
if gateway_status.service_status.as_deref() == Some("running")
|
||||
&& gateway_status.listener_pids.is_empty()
|
||||
{
|
||||
issues.push("Service reports running but no listener processes found".to_string());
|
||||
}
|
||||
|
||||
// 5. Determine overall health status
|
||||
let status = if !cli_available {
|
||||
HealthStatus::Unhealthy
|
||||
} else if !port_check.accessible {
|
||||
HealthStatus::Unhealthy
|
||||
} else if gateway_status.listener_pids.is_empty() {
|
||||
HealthStatus::Unhealthy
|
||||
} else if !issues.is_empty() {
|
||||
// Has some issues but core functionality is working
|
||||
HealthStatus::Healthy
|
||||
} else {
|
||||
HealthStatus::Healthy
|
||||
};
|
||||
|
||||
Ok(HealthCheckResponse {
|
||||
status,
|
||||
process: process_health,
|
||||
port_check,
|
||||
last_check_timestamp,
|
||||
checks_performed,
|
||||
issues,
|
||||
runtime_source: Some(runtime.source),
|
||||
})
|
||||
}
|
||||
|
||||
/// Quick ping to check if ZCLAW is alive (lightweight check)
|
||||
#[tauri::command]
|
||||
pub fn zclaw_ping(app: AppHandle) -> Result<bool, String> {
|
||||
let port_check = check_port_accessibility("127.0.0.1", ZCLAW_DEFAULT_PORT, 1000);
|
||||
|
||||
if port_check.accessible {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// Fallback: check via status command
|
||||
match run_zclaw(&app, &["gateway", "status", "--json", "--no-probe"]) {
|
||||
Ok(result) => {
|
||||
if let Ok(status) = parse_json_output(&result.stdout) {
|
||||
// Check if there are any listener PIDs
|
||||
let has_listeners = status
|
||||
.get("port")
|
||||
.and_then(|p| p.get("listeners"))
|
||||
.and_then(Value::as_array)
|
||||
.map(|arr| !arr.is_empty())
|
||||
.unwrap_or(false);
|
||||
|
||||
Ok(has_listeners)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
Err(_) => Ok(false),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user