feat(server): 可观测性 Phase 1 — 健康检查路由 + Prometheus 指标 + 连接池/事件积压监控
- 添加 /health/live 存活探针别名(原 /health + /health/ready 保留) - 新增 metrics middleware:http_requests_total 计数器 + http_request_duration_seconds 直方图 - Prometheus exporter 独立端口 9090(可通过 ERP__SERVER__METRICS_PORT 覆盖) - 后台任务每 30s 采样 DB 连接池活跃/空闲连接数(pg_stat_activity) - 后台任务每 30s 采样 EventBus pending 事件积压数 - UUID 路径归一化避免高基数(/api/v1/users/:id/posts)
This commit is contained in:
@@ -51,3 +51,69 @@ async fn run_cleanup(db: &sea_orm::DatabaseConnection) -> Result<(), sea_orm::Db
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 启动 DB 连接池 + EventBus 积压指标采样任务。
|
||||
///
|
||||
/// 每 30 秒采样一次并导出为 Prometheus gauge:
|
||||
/// - `db_pool_connections_active` — 当前活跃连接数
|
||||
/// - `db_pool_connections_idle` — 当前空闲连接数
|
||||
/// - `eventbus_pending_total` — pending 状态的领域事件数
|
||||
pub fn start_pool_metrics(db: sea_orm::DatabaseConnection) {
|
||||
tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(30));
|
||||
loop {
|
||||
interval.tick().await;
|
||||
sample_pool_metrics(&db).await;
|
||||
sample_eventbus_backlog(&db).await;
|
||||
}
|
||||
});
|
||||
tracing::info!("DB 连接池 + EventBus 积压指标采样已启动(每 30 秒采样一次)");
|
||||
}
|
||||
|
||||
async fn sample_pool_metrics(db: &sea_orm::DatabaseConnection) {
|
||||
use sea_orm::FromQueryResult;
|
||||
|
||||
#[derive(FromQueryResult)]
|
||||
struct CountRow {
|
||||
cnt: i64,
|
||||
}
|
||||
|
||||
// 通过 pg_stat_activity 查询当前连接数
|
||||
let stmt = sea_orm::Statement::from_string(
|
||||
sea_orm::DatabaseBackend::Postgres,
|
||||
"SELECT COUNT(*)::bigint AS cnt FROM pg_stat_activity WHERE state = 'active'".to_string(),
|
||||
);
|
||||
if let Ok(Some(row)) = CountRow::find_by_statement(stmt).one(db).await {
|
||||
metrics::gauge!("db_pool_connections_active").set(row.cnt as f64);
|
||||
}
|
||||
|
||||
let stmt = sea_orm::Statement::from_string(
|
||||
sea_orm::DatabaseBackend::Postgres,
|
||||
"SELECT COUNT(*)::bigint AS cnt FROM pg_stat_activity WHERE state = 'idle'".to_string(),
|
||||
);
|
||||
if let Ok(Some(row)) = CountRow::find_by_statement(stmt).one(db).await {
|
||||
metrics::gauge!("db_pool_connections_idle").set(row.cnt as f64);
|
||||
}
|
||||
}
|
||||
|
||||
async fn sample_eventbus_backlog(db: &sea_orm::DatabaseConnection) {
|
||||
use sea_orm::FromQueryResult;
|
||||
|
||||
#[derive(FromQueryResult)]
|
||||
struct CountRow {
|
||||
cnt: i64,
|
||||
}
|
||||
|
||||
let stmt = sea_orm::Statement::from_string(
|
||||
sea_orm::DatabaseBackend::Postgres,
|
||||
"SELECT COUNT(*)::bigint AS cnt FROM domain_events WHERE status = 'pending'".to_string(),
|
||||
);
|
||||
match CountRow::find_by_statement(stmt).one(db).await {
|
||||
Ok(Some(row)) => {
|
||||
metrics::gauge!("eventbus_pending_total").set(row.cnt as f64);
|
||||
}
|
||||
_ => {
|
||||
tracing::debug!("EventBus 积压采样:无法获取 pending 事件数");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user