Files
hms/crates/erp-server/src/tasks.rs
iven 5ab8bf8479
Some checks failed
CI / rust-check (push) Has been cancelled
CI / rust-test (push) Has been cancelled
CI / frontend-build (push) Has been cancelled
CI / security-audit (push) Has been cancelled
feat(server): 可观测性 Phase 1 — 健康检查路由 + Prometheus 指标 + 连接池/事件积压监控
- 添加 /health/live 存活探针别名(原 /health + /health/ready 保留)
- 新增 metrics middleware:http_requests_total 计数器 + http_request_duration_seconds 直方图
- Prometheus exporter 独立端口 9090(可通过 ERP__SERVER__METRICS_PORT 覆盖)
- 后台任务每 30s 采样 DB 连接池活跃/空闲连接数(pg_stat_activity)
- 后台任务每 30s 采样 EventBus pending 事件积压数
- UUID 路径归一化避免高基数(/api/v1/users/:id/posts)
2026-04-28 20:39:11 +08:00

120 lines
3.9 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use std::time::Duration;
/// 启动事件清理后台任务。
///
/// 每日执行一次:
/// - 调用 `cleanup_old_published_events()` 归档 >90 天的已发布事件
/// - 调用 `cleanup_old_processed_events()` 清理 >7 天的去重记录
pub fn start_event_cleanup(db: sea_orm::DatabaseConnection) {
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(86400));
loop {
interval.tick().await;
if let Err(e) = run_cleanup(&db).await {
tracing::warn!(error = %e, "事件清理任务执行失败");
}
}
});
tracing::info!("事件清理任务已启动(每 24 小时执行一次)");
}
async fn run_cleanup(db: &sea_orm::DatabaseConnection) -> Result<(), sea_orm::DbErr> {
use sea_orm::ConnectionTrait;
// 归档 >90 天的已发布事件
match db
.execute_unprepared("SELECT cleanup_old_published_events(90, 1000)")
.await
{
Ok(result) => {
tracing::info!(
rows_affected = result.rows_affected(),
"已发布事件归档完成"
);
}
Err(e) => tracing::warn!(error = %e, "已发布事件归档失败"),
}
// 清理 >7 天的去重记录
match db
.execute_unprepared("SELECT cleanup_old_processed_events(7, 1000)")
.await
{
Ok(result) => {
tracing::info!(
rows_affected = result.rows_affected(),
"去重记录清理完成"
);
}
Err(e) => tracing::warn!(error = %e, "去重记录清理失败"),
}
Ok(())
}
/// 启动 DB 连接池 + EventBus 积压指标采样任务。
///
/// 每 30 秒采样一次并导出为 Prometheus gauge
/// - `db_pool_connections_active` — 当前活跃连接数
/// - `db_pool_connections_idle` — 当前空闲连接数
/// - `eventbus_pending_total` — pending 状态的领域事件数
pub fn start_pool_metrics(db: sea_orm::DatabaseConnection) {
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(30));
loop {
interval.tick().await;
sample_pool_metrics(&db).await;
sample_eventbus_backlog(&db).await;
}
});
tracing::info!("DB 连接池 + EventBus 积压指标采样已启动(每 30 秒采样一次)");
}
async fn sample_pool_metrics(db: &sea_orm::DatabaseConnection) {
use sea_orm::FromQueryResult;
#[derive(FromQueryResult)]
struct CountRow {
cnt: i64,
}
// 通过 pg_stat_activity 查询当前连接数
let stmt = sea_orm::Statement::from_string(
sea_orm::DatabaseBackend::Postgres,
"SELECT COUNT(*)::bigint AS cnt FROM pg_stat_activity WHERE state = 'active'".to_string(),
);
if let Ok(Some(row)) = CountRow::find_by_statement(stmt).one(db).await {
metrics::gauge!("db_pool_connections_active").set(row.cnt as f64);
}
let stmt = sea_orm::Statement::from_string(
sea_orm::DatabaseBackend::Postgres,
"SELECT COUNT(*)::bigint AS cnt FROM pg_stat_activity WHERE state = 'idle'".to_string(),
);
if let Ok(Some(row)) = CountRow::find_by_statement(stmt).one(db).await {
metrics::gauge!("db_pool_connections_idle").set(row.cnt as f64);
}
}
async fn sample_eventbus_backlog(db: &sea_orm::DatabaseConnection) {
use sea_orm::FromQueryResult;
#[derive(FromQueryResult)]
struct CountRow {
cnt: i64,
}
let stmt = sea_orm::Statement::from_string(
sea_orm::DatabaseBackend::Postgres,
"SELECT COUNT(*)::bigint AS cnt FROM domain_events WHERE status = 'pending'".to_string(),
);
match CountRow::find_by_statement(stmt).one(db).await {
Ok(Some(row)) => {
metrics::gauge!("eventbus_pending_total").set(row.cnt as f64);
}
_ => {
tracing::debug!("EventBus 积压采样:无法获取 pending 事件数");
}
}
}