feat(server): 可观测性 Phase 1 — 健康检查路由 + Prometheus 指标 + 连接池/事件积压监控
Some checks failed
CI / rust-check (push) Has been cancelled
CI / rust-test (push) Has been cancelled
CI / frontend-build (push) Has been cancelled
CI / security-audit (push) Has been cancelled

- 添加 /health/live 存活探针别名(原 /health + /health/ready 保留)
- 新增 metrics middleware:http_requests_total 计数器 + http_request_duration_seconds 直方图
- Prometheus exporter 独立端口 9090(可通过 ERP__SERVER__METRICS_PORT 覆盖)
- 后台任务每 30s 采样 DB 连接池活跃/空闲连接数(pg_stat_activity)
- 后台任务每 30s 采样 EventBus pending 事件积压数
- UUID 路径归一化避免高基数(/api/v1/users/:id/posts)
This commit is contained in:
iven
2026-04-28 20:39:11 +08:00
parent f99892ee16
commit 5ab8bf8479
9 changed files with 402 additions and 17 deletions

View File

@@ -0,0 +1,122 @@
use axum::extract::Request;
use axum::http::Method;
use axum::middleware::Next;
use axum::response::{IntoResponse, Response};
use metrics::{counter, histogram};
use std::time::Instant;
/// HTTP 请求指标中间件。
///
/// 记录两个 Prometheus 指标:
/// - `http_requests_total` — 计数器,标签: method, path, status
/// - `http_request_duration_seconds` — 直方图,标签: method, path, status
pub async fn metrics_middleware(req: Request, next: Next) -> Response {
let method = method_label(req.method());
let path = path_label(req.uri().path());
let start = Instant::now();
let resp = next.run(req).await;
let elapsed = start.elapsed();
let status = resp.status().as_u16().to_string();
let labels = [
("method", method.clone()),
("path", path.clone()),
("status", status.clone()),
];
counter!("http_requests_total", &labels).increment(1);
histogram!("http_request_duration_seconds", &labels).record(elapsed.as_secs_f64());
resp
}
fn method_label(method: &Method) -> String {
method.as_str().to_owned()
}
/// 归一化路径:将 UUID 段替换为 `:id`,避免高基数。
fn path_label(path: &str) -> String {
let parts: Vec<&str> = path
.split('/')
.filter(|s| !s.is_empty())
.map(|s| if looks_like_uuid(s) { ":id" } else { s })
.collect();
if parts.is_empty() {
"/".to_string()
} else {
format!("/{}", parts.join("/"))
}
}
fn looks_like_uuid(s: &str) -> bool {
s.len() == 36
&& s.chars().filter(|c| *c == '-').count() == 4
&& s.chars().all(|c| c.is_ascii_hexdigit() || c == '-')
}
/// 在独立端口启动 Prometheus exporter。
pub fn start_metrics_server(port: u16) {
let builder = metrics_exporter_prometheus::PrometheusBuilder::new();
let recorder = builder.build_recorder();
let handle = recorder.handle();
if let Err(e) = metrics::set_global_recorder(recorder) {
tracing::error!(error = %e, "Failed to install Prometheus recorder");
return;
}
tokio::spawn(async move {
let app = axum::Router::new()
.route(
"/metrics",
axum::routing::get(move || {
let handle = handle.clone();
async move {
let body = handle.render();
axum::response::IntoResponse::into_response(
([(axum::http::header::CONTENT_TYPE, "text/plain; version=0.0.4")], body),
)
}
}),
)
.fallback(|| async { axum::http::StatusCode::NOT_FOUND.into_response() as Response });
let addr = format!("0.0.0.0:{port}");
match tokio::net::TcpListener::bind(&addr).await {
Ok(listener) => {
tracing::info!(addr = %addr, "Prometheus metrics server listening");
if let Err(e) = axum::serve(listener, app).await {
tracing::error!(error = %e, "Metrics server error");
}
}
Err(e) => {
tracing::error!(error = %e, addr = %addr, "Failed to bind metrics server");
}
}
});
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn path_label_normalizes_uuids() {
assert_eq!(path_label("/api/v1/users"), "/api/v1/users");
assert_eq!(
path_label("/api/v1/users/01234567-89ab-cdef-0123-456789abcdef/posts"),
"/api/v1/users/:id/posts"
);
assert_eq!(path_label("/"), "/");
assert_eq!(path_label(""), "/");
}
#[test]
fn is_uuid_checks_format() {
assert!(looks_like_uuid("01234567-89ab-cdef-0123-456789abcdef"));
assert!(!looks_like_uuid("not-a-uuid"));
assert!(!looks_like_uuid("short"));
}
}

View File

@@ -1,2 +1,3 @@
pub mod metrics;
pub mod rate_limit;
pub mod tenant_rls;