diff --git a/Cargo.lock b/Cargo.lock index 1d32208..cd19d73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -288,6 +288,28 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "aws-lc-rs" +version = "1.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + [[package]] name = "axum" version = "0.8.8" @@ -555,7 +577,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8144c22e24bbcf26ade86cb6501a0916c46b7e4787abdb0045a467eb1645a1d" dependencies = [ "ambient-authority", - "rand", + "rand 0.8.5", ] [[package]] @@ -681,6 +703,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + [[package]] name = "cobs" version = "0.3.0" @@ -1056,7 +1087,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", - "rand_core", + "rand_core 0.6.4", "typenum", ] @@ -1330,6 +1361,12 @@ dependencies = [ "dtoa", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "either" version = "1.15.0" @@ -1453,7 +1490,7 @@ dependencies = [ "dashmap", "hex", "hmac", - "rand", + "rand 0.8.5", "sea-orm", "serde", "serde_json", @@ -1663,6 +1700,8 @@ dependencies = [ "erp-points", "erp-server-migration", "erp-workflow", + "metrics", + "metrics-exporter-prometheus", "moka", "redis", "sea-orm", @@ -1842,6 +1881,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "funty" version = "2.0.0" @@ -2286,6 +2331,7 @@ dependencies = [ "hyper", "hyper-util", "rustls", + "rustls-native-certs", "tokio", "tokio-rustls", "tower-service", @@ -2478,8 +2524,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af1955a75fa080c677d3972822ec4bad316169ab1cfc6c257a942c2265dbe5fe" dependencies = [ "bitmaps", - "rand_core", - "rand_xoshiro", + "rand_core 0.6.4", + "rand_xoshiro 0.6.0", "sized-chunks", "typenum", "version_check", @@ -2856,6 +2902,53 @@ dependencies = [ "autocfg", ] +[[package]] +name = "metrics" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8" +dependencies = [ + "ahash 0.8.12", + "portable-atomic", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" +dependencies = [ + "base64 0.22.1", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "indexmap", + "ipnet", + "metrics", + "metrics-util", + "quanta", + "thiserror 1.0.69", + "tokio", + "tracing", +] + +[[package]] +name = "metrics-util" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8496cc523d1f94c1385dd8f0f0c2c480b2b8aeccb5b7e4485ad6365523ae376" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown 0.15.5", + "metrics", + "quanta", + "rand 0.9.4", + "rand_xoshiro 0.7.0", + "sketches-ddsketch", +] + [[package]] name = "mime" version = "0.3.17" @@ -3009,7 +3102,7 @@ dependencies = [ "num-integer", "num-iter", "num-traits", - "rand", + "rand 0.8.5", "smallvec", "zeroize", ] @@ -3218,7 +3311,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" dependencies = [ "base64ct", - "rand_core", + "rand_core 0.6.4", "subtle", ] @@ -3342,7 +3435,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -3572,6 +3665,21 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "quanta" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + [[package]] name = "quote" version = "1.0.45" @@ -3606,8 +3714,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", ] [[package]] @@ -3617,7 +3735,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -3629,13 +3757,40 @@ dependencies = [ "getrandom 0.2.17", ] +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + [[package]] name = "rand_xoshiro" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" dependencies = [ - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_xoshiro" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" +dependencies = [ + "rand_core 0.9.5", +] + +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags", ] [[package]] @@ -3876,7 +4031,7 @@ dependencies = [ "num-traits", "pkcs1", "pkcs8", - "rand_core", + "rand_core 0.6.4", "signature", "spki", "subtle", @@ -3903,7 +4058,7 @@ dependencies = [ "borsh", "bytes", "num-traits", - "rand", + "rand 0.8.5", "rkyv", "serde", "serde_json", @@ -3982,6 +4137,7 @@ version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ + "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", @@ -3990,6 +4146,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -4005,6 +4173,7 @@ version = "0.103.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20a6af516fea4b20eccceaf166e8aa666ac996208e8a644ce3ef5aa783bc7cd4" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -4398,7 +4567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ "digest", - "rand_core", + "rand_core 0.6.4", ] [[package]] @@ -4441,6 +4610,12 @@ dependencies = [ "typenum", ] +[[package]] +name = "sketches-ddsketch" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" + [[package]] name = "slab" version = "0.4.12" @@ -4618,7 +4793,7 @@ dependencies = [ "memchr", "once_cell", "percent-encoding", - "rand", + "rand 0.8.5", "rsa", "rust_decimal", "serde", @@ -4662,7 +4837,7 @@ dependencies = [ "memchr", "num-bigint", "once_cell", - "rand", + "rand 0.8.5", "rust_decimal", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 77584f6..1e630c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -119,3 +119,7 @@ handlebars = "6" # HTML sanitization ammonia = "4" + +# Metrics +metrics = "0.24" +metrics-exporter-prometheus = "0.16" diff --git a/crates/erp-server/Cargo.toml b/crates/erp-server/Cargo.toml index 5a1eb3f..2cd9dae 100644 --- a/crates/erp-server/Cargo.toml +++ b/crates/erp-server/Cargo.toml @@ -36,6 +36,8 @@ anyhow.workspace = true uuid.workspace = true chrono.workspace = true moka = { version = "0.12", features = ["sync"] } +metrics.workspace = true +metrics-exporter-prometheus.workspace = true [dev-dependencies] erp-auth = { workspace = true } diff --git a/crates/erp-server/src/config.rs b/crates/erp-server/src/config.rs index 3db265d..03fc655 100644 --- a/crates/erp-server/src/config.rs +++ b/crates/erp-server/src/config.rs @@ -20,6 +20,12 @@ pub struct AppConfig { pub struct ServerConfig { pub host: String, pub port: u16, + #[serde(default = "default_metrics_port")] + pub metrics_port: u16, +} + +fn default_metrics_port() -> u16 { + 9090 } #[derive(Debug, Clone, Deserialize)] diff --git a/crates/erp-server/src/handlers/health.rs b/crates/erp-server/src/handlers/health.rs index b1e85db..a223e0c 100644 --- a/crates/erp-server/src/handlers/health.rs +++ b/crates/erp-server/src/handlers/health.rs @@ -130,5 +130,6 @@ async fn check_redis(client: &redis::Client) -> ComponentStatus { pub fn health_check_router() -> Router { Router::new() .route("/health", get(health_check)) + .route("/health/live", get(health_check)) .route("/health/ready", get(readiness_check)) } diff --git a/crates/erp-server/src/main.rs b/crates/erp-server/src/main.rs index 9fe3d09..573e80d 100644 --- a/crates/erp-server/src/main.rs +++ b/crates/erp-server/src/main.rs @@ -432,6 +432,9 @@ async fn main() -> anyhow::Result<()> { // Start event cleanup (archive old published events + purge processed_events) tasks::start_event_cleanup(db.clone()); + // Start DB connection pool metrics sampling (every 30s) + tasks::start_pool_metrics(db.clone()); + // Start timeout checker (scan overdue tasks every 60s) erp_workflow::WorkflowModule::start_timeout_checker(db.clone(), event_bus.clone()); tracing::info!("Timeout checker started"); @@ -611,8 +614,13 @@ async fn main() -> anyhow::Result<()> { let app = Router::new() .nest("/api/v1", unthrottled_routes.merge(public_routes).merge(protected_routes)) .nest("/uploads", uploads_router) + .layer(axum::middleware::from_fn(middleware::metrics::metrics_middleware)) .layer(cors); + // Start Prometheus metrics exporter on a separate port + let metrics_port = state.config.server.metrics_port; + middleware::metrics::start_metrics_server(metrics_port); + let addr = format!("{}:{}", host, port); let listener = tokio::net::TcpListener::bind(&addr).await?; tracing::info!(addr = %addr, "Server listening"); diff --git a/crates/erp-server/src/middleware/metrics.rs b/crates/erp-server/src/middleware/metrics.rs new file mode 100644 index 0000000..3adab1b --- /dev/null +++ b/crates/erp-server/src/middleware/metrics.rs @@ -0,0 +1,122 @@ +use axum::extract::Request; +use axum::http::Method; +use axum::middleware::Next; +use axum::response::{IntoResponse, Response}; +use metrics::{counter, histogram}; +use std::time::Instant; + +/// HTTP 请求指标中间件。 +/// +/// 记录两个 Prometheus 指标: +/// - `http_requests_total` — 计数器,标签: method, path, status +/// - `http_request_duration_seconds` — 直方图,标签: method, path, status +pub async fn metrics_middleware(req: Request, next: Next) -> Response { + let method = method_label(req.method()); + let path = path_label(req.uri().path()); + + let start = Instant::now(); + let resp = next.run(req).await; + let elapsed = start.elapsed(); + + let status = resp.status().as_u16().to_string(); + + let labels = [ + ("method", method.clone()), + ("path", path.clone()), + ("status", status.clone()), + ]; + + counter!("http_requests_total", &labels).increment(1); + histogram!("http_request_duration_seconds", &labels).record(elapsed.as_secs_f64()); + + resp +} + +fn method_label(method: &Method) -> String { + method.as_str().to_owned() +} + +/// 归一化路径:将 UUID 段替换为 `:id`,避免高基数。 +fn path_label(path: &str) -> String { + let parts: Vec<&str> = path + .split('/') + .filter(|s| !s.is_empty()) + .map(|s| if looks_like_uuid(s) { ":id" } else { s }) + .collect(); + if parts.is_empty() { + "/".to_string() + } else { + format!("/{}", parts.join("/")) + } +} + +fn looks_like_uuid(s: &str) -> bool { + s.len() == 36 + && s.chars().filter(|c| *c == '-').count() == 4 + && s.chars().all(|c| c.is_ascii_hexdigit() || c == '-') +} + +/// 在独立端口启动 Prometheus exporter。 +pub fn start_metrics_server(port: u16) { + let builder = metrics_exporter_prometheus::PrometheusBuilder::new(); + let recorder = builder.build_recorder(); + let handle = recorder.handle(); + + if let Err(e) = metrics::set_global_recorder(recorder) { + tracing::error!(error = %e, "Failed to install Prometheus recorder"); + return; + } + + tokio::spawn(async move { + let app = axum::Router::new() + .route( + "/metrics", + axum::routing::get(move || { + let handle = handle.clone(); + async move { + let body = handle.render(); + axum::response::IntoResponse::into_response( + ([(axum::http::header::CONTENT_TYPE, "text/plain; version=0.0.4")], body), + ) + } + }), + ) + .fallback(|| async { axum::http::StatusCode::NOT_FOUND.into_response() as Response }); + + let addr = format!("0.0.0.0:{port}"); + match tokio::net::TcpListener::bind(&addr).await { + Ok(listener) => { + tracing::info!(addr = %addr, "Prometheus metrics server listening"); + if let Err(e) = axum::serve(listener, app).await { + tracing::error!(error = %e, "Metrics server error"); + } + } + Err(e) => { + tracing::error!(error = %e, addr = %addr, "Failed to bind metrics server"); + } + } + }); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn path_label_normalizes_uuids() { + assert_eq!(path_label("/api/v1/users"), "/api/v1/users"); + assert_eq!( + path_label("/api/v1/users/01234567-89ab-cdef-0123-456789abcdef/posts"), + "/api/v1/users/:id/posts" + ); + assert_eq!(path_label("/"), "/"); + assert_eq!(path_label(""), "/"); + } + + #[test] + fn is_uuid_checks_format() { + assert!(looks_like_uuid("01234567-89ab-cdef-0123-456789abcdef")); + assert!(!looks_like_uuid("not-a-uuid")); + assert!(!looks_like_uuid("short")); + } +} diff --git a/crates/erp-server/src/middleware/mod.rs b/crates/erp-server/src/middleware/mod.rs index 9b7d07e..3fea4da 100644 --- a/crates/erp-server/src/middleware/mod.rs +++ b/crates/erp-server/src/middleware/mod.rs @@ -1,2 +1,3 @@ +pub mod metrics; pub mod rate_limit; pub mod tenant_rls; diff --git a/crates/erp-server/src/tasks.rs b/crates/erp-server/src/tasks.rs index b3105d2..d9cf993 100644 --- a/crates/erp-server/src/tasks.rs +++ b/crates/erp-server/src/tasks.rs @@ -51,3 +51,69 @@ async fn run_cleanup(db: &sea_orm::DatabaseConnection) -> Result<(), sea_orm::Db Ok(()) } + +/// 启动 DB 连接池 + EventBus 积压指标采样任务。 +/// +/// 每 30 秒采样一次并导出为 Prometheus gauge: +/// - `db_pool_connections_active` — 当前活跃连接数 +/// - `db_pool_connections_idle` — 当前空闲连接数 +/// - `eventbus_pending_total` — pending 状态的领域事件数 +pub fn start_pool_metrics(db: sea_orm::DatabaseConnection) { + tokio::spawn(async move { + let mut interval = tokio::time::interval(Duration::from_secs(30)); + loop { + interval.tick().await; + sample_pool_metrics(&db).await; + sample_eventbus_backlog(&db).await; + } + }); + tracing::info!("DB 连接池 + EventBus 积压指标采样已启动(每 30 秒采样一次)"); +} + +async fn sample_pool_metrics(db: &sea_orm::DatabaseConnection) { + use sea_orm::FromQueryResult; + + #[derive(FromQueryResult)] + struct CountRow { + cnt: i64, + } + + // 通过 pg_stat_activity 查询当前连接数 + let stmt = sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::Postgres, + "SELECT COUNT(*)::bigint AS cnt FROM pg_stat_activity WHERE state = 'active'".to_string(), + ); + if let Ok(Some(row)) = CountRow::find_by_statement(stmt).one(db).await { + metrics::gauge!("db_pool_connections_active").set(row.cnt as f64); + } + + let stmt = sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::Postgres, + "SELECT COUNT(*)::bigint AS cnt FROM pg_stat_activity WHERE state = 'idle'".to_string(), + ); + if let Ok(Some(row)) = CountRow::find_by_statement(stmt).one(db).await { + metrics::gauge!("db_pool_connections_idle").set(row.cnt as f64); + } +} + +async fn sample_eventbus_backlog(db: &sea_orm::DatabaseConnection) { + use sea_orm::FromQueryResult; + + #[derive(FromQueryResult)] + struct CountRow { + cnt: i64, + } + + let stmt = sea_orm::Statement::from_string( + sea_orm::DatabaseBackend::Postgres, + "SELECT COUNT(*)::bigint AS cnt FROM domain_events WHERE status = 'pending'".to_string(), + ); + match CountRow::find_by_statement(stmt).one(db).await { + Ok(Some(row)) => { + metrics::gauge!("eventbus_pending_total").set(row.cnt as f64); + } + _ => { + tracing::debug!("EventBus 积压采样:无法获取 pending 事件数"); + } + } +}