diff --git a/crates/erp-health/src/service/mod.rs b/crates/erp-health/src/service/mod.rs index 318c102..2183fb6 100644 --- a/crates/erp-health/src/service/mod.rs +++ b/crates/erp-health/src/service/mod.rs @@ -23,4 +23,5 @@ pub mod points_service; pub mod seed; pub mod stats_service; pub mod trend_service; +pub mod trend_stats; pub mod validation; diff --git a/crates/erp-health/src/service/trend_stats.rs b/crates/erp-health/src/service/trend_stats.rs new file mode 100644 index 0000000..47c858e --- /dev/null +++ b/crates/erp-health/src/service/trend_stats.rs @@ -0,0 +1,456 @@ +//! 统计计算模块 — 线性回归、移动平均、异常检测 +//! +//! 提供纯函数实现,不依赖数据库连接,方便单元测试。 + +use chrono::NaiveDate; +use serde::{Deserialize, Serialize}; +use std::fmt; + +// --------------------------------------------------------------------------- +// 公共类型 +// --------------------------------------------------------------------------- + +/// 趋势方向枚举 +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum TrendDirection { + Rising, + Falling, + Stable, +} + +impl fmt::Display for TrendDirection { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TrendDirection::Rising => write!(f, "rising"), + TrendDirection::Falling => write!(f, "falling"), + TrendDirection::Stable => write!(f, "stable"), + } + } +} + +/// 线性回归结果 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RegressionResult { + /// 回归斜率 + pub slope: f64, + /// 回归截距 + pub intercept: f64, + /// 决定系数 R^2 + pub r_squared: f64, + /// 趋势方向(基于 slope 判断) + pub direction: TrendDirection, + /// 日变化量(等于 slope) + pub daily_change: f64, + /// 周期内总变化量(slope * 数据点跨度天数) + pub period_change: f64, +} + +/// 异常数据点 +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AnomalyPoint { + /// 异常日期 + pub date: NaiveDate, + /// 实际值 + pub value: f64, + /// 均值 + pub mean: f64, + /// 标准差 + pub std_dev: f64, + /// 偏离程度(偏离几个标准差,带正负号) + pub deviation: f64, +} + +// --------------------------------------------------------------------------- +// 公共函数 +// --------------------------------------------------------------------------- + +/// 最小二乘法线性回归 +/// +/// 输入按日期排序的 (日期, 值) 序列,返回回归分析结果。 +/// 当数据不足 2 个点时返回 None。 +pub fn compute_linear_regression(data: &[(NaiveDate, f64)]) -> Option { + if data.len() < 2 { + return None; + } + + let n = data.len() as f64; + // 将日期转换为相对于第一个日期的天数作为 x 值 + let base_date = data[0].0; + let xs: Vec = data + .iter() + .map(|(d, _)| (*d - base_date).num_days() as f64) + .collect(); + let ys: Vec = data.iter().map(|(_, v)| *v).collect(); + + let sum_x: f64 = xs.iter().sum(); + let sum_y: f64 = ys.iter().sum(); + let sum_xy: f64 = xs.iter().zip(ys.iter()).map(|(x, y)| x * y).sum(); + let sum_x2: f64 = xs.iter().map(|x| x * x).sum(); + + let denominator = n * sum_x2 - sum_x * sum_x; + let slope = (n * sum_xy - sum_x * sum_y) / denominator; + let intercept = (sum_y - slope * sum_x) / n; + + // 计算 R^2 + let mean_y = sum_y / n; + let ss_tot: f64 = ys.iter().map(|y| (y - mean_y).powi(2)).sum(); + let r_squared = if ss_tot > 0.0 { + let ss_res: f64 = ys + .iter() + .zip(xs.iter()) + .map(|(y, x)| (y - (intercept + slope * x)).powi(2)) + .sum(); + 1.0 - ss_res / ss_tot + } else { + // 所有 y 值相同,完美拟合 + 1.0 + }; + + // 方向判断:slope 接近 0 视为稳定 + let direction = if slope.abs() < 1e-10 { + TrendDirection::Stable + } else if slope > 0.0 { + TrendDirection::Rising + } else { + TrendDirection::Falling + }; + + // period_change = slope * (最后一个 x - 第一个 x) + let x_span = xs[xs.len() - 1] - xs[0]; + let period_change = slope * x_span; + + Some(RegressionResult { + slope, + intercept, + r_squared, + direction, + daily_change: slope, + period_change, + }) +} + +/// 移动平均 +/// +/// 对数值序列计算指定窗口大小的简单移动平均。 +/// 前 (window - 1) 个位置输出 NaN,之后每个位置输出前 window 个值的平均。 +/// 当 values 为空或 window 为 0 时返回空 Vec。 +pub fn compute_moving_average(values: &[f64], window: usize) -> Vec { + if values.is_empty() || window == 0 { + return Vec::new(); + } + + let len = values.len(); + let mut result = Vec::with_capacity(len); + + for i in 0..len { + if i + 1 < window { + // 前 (window - 1) 个位置无法计算完整窗口 + result.push(f64::NAN); + } else { + let sum: f64 = values[i + 1 - window..=i].iter().sum(); + result.push(sum / window as f64); + } + } + + result +} + +/// 均值 +/- N 标准差异常检测 +/// +/// 对按日期排序的 (日期, 值) 序列,找出偏离均值超过 std_threshold 倍标准差的数据点。 +/// 当数据不足 2 个点时返回空 Vec。 +pub fn detect_anomalies(data: &[(NaiveDate, f64)], std_threshold: f64) -> Vec { + if data.len() < 3 { + return Vec::new(); + } + + let values: Vec = data.iter().map(|(_, v)| *v).collect(); + let n = values.len() as f64; + let mean: f64 = values.iter().sum::() / n; + let variance: f64 = values.iter().map(|v| (v - mean).powi(2)).sum::() / n; + let std_dev = variance.sqrt(); + + // 标准差为 0 时所有值相同,无异常 + if std_dev < 1e-10 { + return Vec::new(); + } + + let mut anomalies = Vec::new(); + + for (date, value) in data { + let deviation = (value - mean) / std_dev; + if deviation.abs() > std_threshold { + anomalies.push(AnomalyPoint { + date: *date, + value: *value, + mean, + std_dev, + deviation, + }); + } + } + + anomalies +} + +// --------------------------------------------------------------------------- +// 单元测试 +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + /// 辅助函数:创建 NaiveDate + fn d(year: i32, month: u32, day: u32) -> NaiveDate { + NaiveDate::from_ymd_opt(year, month, day).unwrap() + } + + // ======================================================================= + // compute_linear_regression 测试 + // ======================================================================= + + #[test] + fn linear_regression_空数据返回_none() { + let data: Vec<(NaiveDate, f64)> = vec![]; + assert!(compute_linear_regression(&data).is_none()); + } + + #[test] + fn linear_regression_单点返回_none() { + let data = vec![(d(2026, 1, 1), 100.0)]; + assert!(compute_linear_regression(&data).is_none()); + } + + #[test] + fn linear_regression_完全水平线() { + let data = vec![ + (d(2026, 1, 1), 100.0), + (d(2026, 1, 2), 100.0), + (d(2026, 1, 3), 100.0), + (d(2026, 1, 4), 100.0), + (d(2026, 1, 5), 100.0), + ]; + let result = compute_linear_regression(&data).unwrap(); + assert_eq!(result.direction, TrendDirection::Stable); + assert!((result.slope).abs() < 1e-10); + assert!((result.intercept - 100.0).abs() < 1e-10); + assert!((result.r_squared - 1.0).abs() < 1e-10); + assert!((result.daily_change).abs() < 1e-10); + assert!((result.period_change).abs() < 1e-10); + } + + #[test] + fn linear_regression_严格递增() { + let data = vec![ + (d(2026, 1, 1), 10.0), + (d(2026, 1, 2), 20.0), + (d(2026, 1, 3), 30.0), + (d(2026, 1, 4), 40.0), + (d(2026, 1, 5), 50.0), + ]; + let result = compute_linear_regression(&data).unwrap(); + assert_eq!(result.direction, TrendDirection::Rising); + assert!((result.slope - 10.0).abs() < 1e-10); + assert!((result.r_squared - 1.0).abs() < 1e-10); + assert!((result.period_change - 40.0).abs() < 1e-10); + } + + #[test] + fn linear_regression_严格递减() { + let data = vec![ + (d(2026, 1, 1), 50.0), + (d(2026, 1, 2), 40.0), + (d(2026, 1, 3), 30.0), + (d(2026, 1, 4), 20.0), + (d(2026, 1, 5), 10.0), + ]; + let result = compute_linear_regression(&data).unwrap(); + assert_eq!(result.direction, TrendDirection::Falling); + assert!((result.slope - (-10.0)).abs() < 1e-10); + assert!((result.r_squared - 1.0).abs() < 1e-10); + assert!((result.period_change - (-40.0)).abs() < 1e-10); + } + + #[test] + fn linear_regression_含噪声但总体上升趋势() { + // 上升趋势但带噪声 + let data = vec![ + (d(2026, 1, 1), 95.0), + (d(2026, 1, 2), 102.0), + (d(2026, 1, 3), 98.0), + (d(2026, 1, 4), 108.0), + (d(2026, 1, 5), 105.0), + (d(2026, 1, 6), 112.0), + (d(2026, 1, 7), 110.0), + ]; + let result = compute_linear_regression(&data).unwrap(); + assert_eq!(result.direction, TrendDirection::Rising); + assert!(result.slope > 0.0); + // R^2 应该小于 1(有噪声) + assert!(result.r_squared < 1.0); + assert!(result.r_squared > 0.0); + } + + #[test] + fn linear_regression_period_change_等于_slope_乘天数跨度() { + let data = vec![ + (d(2026, 1, 1), 100.0), + (d(2026, 1, 3), 106.0), // 间隔 2 天 + ]; + let result = compute_linear_regression(&data).unwrap(); + // slope = (106 - 100) / 2 = 3.0 + assert!((result.slope - 3.0).abs() < 1e-10); + // period_change = slope * 2 = 6.0 + assert!((result.period_change - 6.0).abs() < 1e-10); + } + + // ======================================================================= + // compute_moving_average 测试 + // ======================================================================= + + #[test] + fn moving_average_空输入返回空() { + assert!(compute_moving_average(&[], 3).is_empty()); + } + + #[test] + fn moving_average_window_为0返回空() { + assert!(compute_moving_average(&[1.0, 2.0, 3.0], 0).is_empty()); + } + + #[test] + fn moving_average_window_大于数据长度返回全nan() { + let result = compute_moving_average(&[1.0, 2.0], 5); + assert_eq!(result.len(), 2); + assert!(result[0].is_nan()); + assert!(result[1].is_nan()); + } + + #[test] + fn moving_average_window_3基本用例() { + let values = vec![1.0, 2.0, 3.0, 4.0, 5.0]; + let result = compute_moving_average(&values, 3); + assert_eq!(result.len(), 5); + assert!(result[0].is_nan()); + assert!(result[1].is_nan()); + assert!((result[2] - 2.0).abs() < 1e-10); // (1+2+3)/3 + assert!((result[3] - 3.0).abs() < 1e-10); // (2+3+4)/3 + assert!((result[4] - 4.0).abs() < 1e-10); // (3+4+5)/3 + } + + #[test] + fn moving_average_window_1等于原数据() { + let values = vec![10.0, 20.0, 30.0]; + let result = compute_moving_average(&values, 1); + assert_eq!(result.len(), 3); + assert!((result[0] - 10.0).abs() < 1e-10); + assert!((result[1] - 20.0).abs() < 1e-10); + assert!((result[2] - 30.0).abs() < 1e-10); + } + + #[test] + fn moving_average_window_等于数据长度() { + let values = vec![10.0, 20.0, 30.0]; + let result = compute_moving_average(&values, 3); + assert_eq!(result.len(), 3); + assert!(result[0].is_nan()); + assert!(result[1].is_nan()); + assert!((result[2] - 20.0).abs() < 1e-10); // (10+20+30)/3 + } + + // ======================================================================= + // detect_anomalies 测试 + // ======================================================================= + + #[test] + fn detect_anomalies_空数据返回空() { + let data: Vec<(NaiveDate, f64)> = vec![]; + assert!(detect_anomalies(&data, 2.0).is_empty()); + } + + #[test] + fn detect_anomalies_单点返回空() { + let data = vec![(d(2026, 1, 1), 100.0)]; + assert!(detect_anomalies(&data, 2.0).is_empty()); + } + + #[test] + fn detect_anomalies_两点返回空() { + let data = vec![(d(2026, 1, 1), 100.0), (d(2026, 1, 2), 200.0)]; + assert!(detect_anomalies(&data, 2.0).is_empty()); + } + + #[test] + fn detect_anomalies_正常数据无异常() { + // 所有数据在 98-102 范围内 + let data: Vec<(NaiveDate, f64)> = (0..10) + .map(|i| (d(2026, 1, 1 + i as u32), 100.0 + (i as f64 * 0.2 - 1.0))) + .collect(); + let anomalies = detect_anomalies(&data, 2.0); + assert!(anomalies.is_empty()); + } + + #[test] + fn detect_anomalies_检测到单个异常点() { + let data = vec![ + (d(2026, 1, 1), 100.0), + (d(2026, 1, 2), 101.0), + (d(2026, 1, 3), 99.0), + (d(2026, 1, 4), 150.0), // 异常:偏离很大 + (d(2026, 1, 5), 100.0), + (d(2026, 1, 6), 101.0), + (d(2026, 1, 7), 99.0), + ]; + let anomalies = detect_anomalies(&data, 2.0); + assert_eq!(anomalies.len(), 1); + assert_eq!(anomalies[0].date, d(2026, 1, 4)); + assert!((anomalies[0].value - 150.0).abs() < 1e-10); + assert!(anomalies[0].deviation > 2.0); + } + + #[test] + fn detect_anomalies_低阈值检测到更多异常() { + let data = vec![ + (d(2026, 1, 1), 100.0), + (d(2026, 1, 2), 110.0), // 偏离略高 + (d(2026, 1, 3), 90.0), // 偏离略低 + (d(2026, 1, 4), 100.0), + (d(2026, 1, 5), 105.0), + ]; + // 阈值 1.0 应该比阈值 2.0 检测到更多异常 + let anomalies_1 = detect_anomalies(&data, 1.0); + let anomalies_2 = detect_anomalies(&data, 2.0); + assert!(anomalies_1.len() >= anomalies_2.len()); + } + + #[test] + fn detect_anomalies_所有相同值无异常() { + let data: Vec<(NaiveDate, f64)> = (0..5) + .map(|i| (d(2026, 1, 1 + i as u32), 100.0)) + .collect(); + let anomalies = detect_anomalies(&data, 2.0); + assert!(anomalies.is_empty()); + } + + #[test] + fn detect_anomalies_异常点包含完整信息() { + let data = vec![ + (d(2026, 1, 1), 100.0), + (d(2026, 1, 2), 100.0), + (d(2026, 1, 3), 200.0), // 明显异常 + (d(2026, 1, 4), 100.0), + (d(2026, 1, 5), 100.0), + ]; + let anomalies = detect_anomalies(&data, 1.5); + assert_eq!(anomalies.len(), 1); + let a = &anomalies[0]; + assert_eq!(a.date, d(2026, 1, 3)); + assert!((a.value - 200.0).abs() < 1e-10); + // mean = (100+100+200+100+100)/5 = 120 + assert!((a.mean - 120.0).abs() < 1e-10); + assert!(a.std_dev > 0.0); + assert!(a.deviation > 1.5); + } +}