feat(server): 可观测性 Phase 1 — 健康检查路由 + Prometheus 指标 + 连接池/事件积压监控
Some checks failed
CI / rust-check (push) Has been cancelled
CI / rust-test (push) Has been cancelled
CI / frontend-build (push) Has been cancelled
CI / security-audit (push) Has been cancelled

- 添加 /health/live 存活探针别名(原 /health + /health/ready 保留)
- 新增 metrics middleware:http_requests_total 计数器 + http_request_duration_seconds 直方图
- Prometheus exporter 独立端口 9090(可通过 ERP__SERVER__METRICS_PORT 覆盖)
- 后台任务每 30s 采样 DB 连接池活跃/空闲连接数(pg_stat_activity)
- 后台任务每 30s 采样 EventBus pending 事件积压数
- UUID 路径归一化避免高基数(/api/v1/users/:id/posts)
This commit is contained in:
iven
2026-04-28 20:39:11 +08:00
parent f99892ee16
commit 5ab8bf8479
9 changed files with 402 additions and 17 deletions

View File

@@ -51,3 +51,69 @@ async fn run_cleanup(db: &sea_orm::DatabaseConnection) -> Result<(), sea_orm::Db
Ok(())
}
/// 启动 DB 连接池 + EventBus 积压指标采样任务。
///
/// 每 30 秒采样一次并导出为 Prometheus gauge
/// - `db_pool_connections_active` — 当前活跃连接数
/// - `db_pool_connections_idle` — 当前空闲连接数
/// - `eventbus_pending_total` — pending 状态的领域事件数
pub fn start_pool_metrics(db: sea_orm::DatabaseConnection) {
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(30));
loop {
interval.tick().await;
sample_pool_metrics(&db).await;
sample_eventbus_backlog(&db).await;
}
});
tracing::info!("DB 连接池 + EventBus 积压指标采样已启动(每 30 秒采样一次)");
}
async fn sample_pool_metrics(db: &sea_orm::DatabaseConnection) {
use sea_orm::FromQueryResult;
#[derive(FromQueryResult)]
struct CountRow {
cnt: i64,
}
// 通过 pg_stat_activity 查询当前连接数
let stmt = sea_orm::Statement::from_string(
sea_orm::DatabaseBackend::Postgres,
"SELECT COUNT(*)::bigint AS cnt FROM pg_stat_activity WHERE state = 'active'".to_string(),
);
if let Ok(Some(row)) = CountRow::find_by_statement(stmt).one(db).await {
metrics::gauge!("db_pool_connections_active").set(row.cnt as f64);
}
let stmt = sea_orm::Statement::from_string(
sea_orm::DatabaseBackend::Postgres,
"SELECT COUNT(*)::bigint AS cnt FROM pg_stat_activity WHERE state = 'idle'".to_string(),
);
if let Ok(Some(row)) = CountRow::find_by_statement(stmt).one(db).await {
metrics::gauge!("db_pool_connections_idle").set(row.cnt as f64);
}
}
async fn sample_eventbus_backlog(db: &sea_orm::DatabaseConnection) {
use sea_orm::FromQueryResult;
#[derive(FromQueryResult)]
struct CountRow {
cnt: i64,
}
let stmt = sea_orm::Statement::from_string(
sea_orm::DatabaseBackend::Postgres,
"SELECT COUNT(*)::bigint AS cnt FROM domain_events WHERE status = 'pending'".to_string(),
);
match CountRow::find_by_statement(stmt).one(db).await {
Ok(Some(row)) => {
metrics::gauge!("eventbus_pending_total").set(row.cnt as f64);
}
_ => {
tracing::debug!("EventBus 积压采样:无法获取 pending 事件数");
}
}
}