feat(server): 可观测性 Phase 1 — 健康检查路由 + Prometheus 指标 + 连接池/事件积压监控
- 添加 /health/live 存活探针别名(原 /health + /health/ready 保留) - 新增 metrics middleware:http_requests_total 计数器 + http_request_duration_seconds 直方图 - Prometheus exporter 独立端口 9090(可通过 ERP__SERVER__METRICS_PORT 覆盖) - 后台任务每 30s 采样 DB 连接池活跃/空闲连接数(pg_stat_activity) - 后台任务每 30s 采样 EventBus pending 事件积压数 - UUID 路径归一化避免高基数(/api/v1/users/:id/posts)
This commit is contained in:
@@ -432,6 +432,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
// Start event cleanup (archive old published events + purge processed_events)
|
||||
tasks::start_event_cleanup(db.clone());
|
||||
|
||||
// Start DB connection pool metrics sampling (every 30s)
|
||||
tasks::start_pool_metrics(db.clone());
|
||||
|
||||
// Start timeout checker (scan overdue tasks every 60s)
|
||||
erp_workflow::WorkflowModule::start_timeout_checker(db.clone(), event_bus.clone());
|
||||
tracing::info!("Timeout checker started");
|
||||
@@ -611,8 +614,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
let app = Router::new()
|
||||
.nest("/api/v1", unthrottled_routes.merge(public_routes).merge(protected_routes))
|
||||
.nest("/uploads", uploads_router)
|
||||
.layer(axum::middleware::from_fn(middleware::metrics::metrics_middleware))
|
||||
.layer(cors);
|
||||
|
||||
// Start Prometheus metrics exporter on a separate port
|
||||
let metrics_port = state.config.server.metrics_port;
|
||||
middleware::metrics::start_metrics_server(metrics_port);
|
||||
|
||||
let addr = format!("{}:{}", host, port);
|
||||
let listener = tokio::net::TcpListener::bind(&addr).await?;
|
||||
tracing::info!(addr = %addr, "Server listening");
|
||||
|
||||
Reference in New Issue
Block a user