feat(health): 统计计算模块 — 线性回归、移动平均、异常检测
新增 trend_stats.rs 纯函数模块,提供三个统计计算能力: - compute_linear_regression: 最小二乘法线性回归,返回 slope/intercept/R^2/方向/日变化/周期变化 - compute_moving_average: 简单移动平均,支持任意窗口大小 - detect_anomalies: 均值 +/- N 标准差异常检测 包含 21 个单元测试,覆盖边界条件和正常用例。
This commit is contained in:
@@ -23,4 +23,5 @@ pub mod points_service;
|
||||
pub mod seed;
|
||||
pub mod stats_service;
|
||||
pub mod trend_service;
|
||||
pub mod trend_stats;
|
||||
pub mod validation;
|
||||
|
||||
456
crates/erp-health/src/service/trend_stats.rs
Normal file
456
crates/erp-health/src/service/trend_stats.rs
Normal file
@@ -0,0 +1,456 @@
|
||||
//! 统计计算模块 — 线性回归、移动平均、异常检测
|
||||
//!
|
||||
//! 提供纯函数实现,不依赖数据库连接,方便单元测试。
|
||||
|
||||
use chrono::NaiveDate;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 公共类型
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// 趋势方向枚举
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum TrendDirection {
|
||||
Rising,
|
||||
Falling,
|
||||
Stable,
|
||||
}
|
||||
|
||||
impl fmt::Display for TrendDirection {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
TrendDirection::Rising => write!(f, "rising"),
|
||||
TrendDirection::Falling => write!(f, "falling"),
|
||||
TrendDirection::Stable => write!(f, "stable"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// 线性回归结果
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RegressionResult {
|
||||
/// 回归斜率
|
||||
pub slope: f64,
|
||||
/// 回归截距
|
||||
pub intercept: f64,
|
||||
/// 决定系数 R^2
|
||||
pub r_squared: f64,
|
||||
/// 趋势方向(基于 slope 判断)
|
||||
pub direction: TrendDirection,
|
||||
/// 日变化量(等于 slope)
|
||||
pub daily_change: f64,
|
||||
/// 周期内总变化量(slope * 数据点跨度天数)
|
||||
pub period_change: f64,
|
||||
}
|
||||
|
||||
/// 异常数据点
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AnomalyPoint {
|
||||
/// 异常日期
|
||||
pub date: NaiveDate,
|
||||
/// 实际值
|
||||
pub value: f64,
|
||||
/// 均值
|
||||
pub mean: f64,
|
||||
/// 标准差
|
||||
pub std_dev: f64,
|
||||
/// 偏离程度(偏离几个标准差,带正负号)
|
||||
pub deviation: f64,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 公共函数
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// 最小二乘法线性回归
|
||||
///
|
||||
/// 输入按日期排序的 (日期, 值) 序列,返回回归分析结果。
|
||||
/// 当数据不足 2 个点时返回 None。
|
||||
pub fn compute_linear_regression(data: &[(NaiveDate, f64)]) -> Option<RegressionResult> {
|
||||
if data.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let n = data.len() as f64;
|
||||
// 将日期转换为相对于第一个日期的天数作为 x 值
|
||||
let base_date = data[0].0;
|
||||
let xs: Vec<f64> = data
|
||||
.iter()
|
||||
.map(|(d, _)| (*d - base_date).num_days() as f64)
|
||||
.collect();
|
||||
let ys: Vec<f64> = data.iter().map(|(_, v)| *v).collect();
|
||||
|
||||
let sum_x: f64 = xs.iter().sum();
|
||||
let sum_y: f64 = ys.iter().sum();
|
||||
let sum_xy: f64 = xs.iter().zip(ys.iter()).map(|(x, y)| x * y).sum();
|
||||
let sum_x2: f64 = xs.iter().map(|x| x * x).sum();
|
||||
|
||||
let denominator = n * sum_x2 - sum_x * sum_x;
|
||||
let slope = (n * sum_xy - sum_x * sum_y) / denominator;
|
||||
let intercept = (sum_y - slope * sum_x) / n;
|
||||
|
||||
// 计算 R^2
|
||||
let mean_y = sum_y / n;
|
||||
let ss_tot: f64 = ys.iter().map(|y| (y - mean_y).powi(2)).sum();
|
||||
let r_squared = if ss_tot > 0.0 {
|
||||
let ss_res: f64 = ys
|
||||
.iter()
|
||||
.zip(xs.iter())
|
||||
.map(|(y, x)| (y - (intercept + slope * x)).powi(2))
|
||||
.sum();
|
||||
1.0 - ss_res / ss_tot
|
||||
} else {
|
||||
// 所有 y 值相同,完美拟合
|
||||
1.0
|
||||
};
|
||||
|
||||
// 方向判断:slope 接近 0 视为稳定
|
||||
let direction = if slope.abs() < 1e-10 {
|
||||
TrendDirection::Stable
|
||||
} else if slope > 0.0 {
|
||||
TrendDirection::Rising
|
||||
} else {
|
||||
TrendDirection::Falling
|
||||
};
|
||||
|
||||
// period_change = slope * (最后一个 x - 第一个 x)
|
||||
let x_span = xs[xs.len() - 1] - xs[0];
|
||||
let period_change = slope * x_span;
|
||||
|
||||
Some(RegressionResult {
|
||||
slope,
|
||||
intercept,
|
||||
r_squared,
|
||||
direction,
|
||||
daily_change: slope,
|
||||
period_change,
|
||||
})
|
||||
}
|
||||
|
||||
/// 移动平均
|
||||
///
|
||||
/// 对数值序列计算指定窗口大小的简单移动平均。
|
||||
/// 前 (window - 1) 个位置输出 NaN,之后每个位置输出前 window 个值的平均。
|
||||
/// 当 values 为空或 window 为 0 时返回空 Vec。
|
||||
pub fn compute_moving_average(values: &[f64], window: usize) -> Vec<f64> {
|
||||
if values.is_empty() || window == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let len = values.len();
|
||||
let mut result = Vec::with_capacity(len);
|
||||
|
||||
for i in 0..len {
|
||||
if i + 1 < window {
|
||||
// 前 (window - 1) 个位置无法计算完整窗口
|
||||
result.push(f64::NAN);
|
||||
} else {
|
||||
let sum: f64 = values[i + 1 - window..=i].iter().sum();
|
||||
result.push(sum / window as f64);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// 均值 +/- N 标准差异常检测
|
||||
///
|
||||
/// 对按日期排序的 (日期, 值) 序列,找出偏离均值超过 std_threshold 倍标准差的数据点。
|
||||
/// 当数据不足 2 个点时返回空 Vec。
|
||||
pub fn detect_anomalies(data: &[(NaiveDate, f64)], std_threshold: f64) -> Vec<AnomalyPoint> {
|
||||
if data.len() < 3 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let values: Vec<f64> = data.iter().map(|(_, v)| *v).collect();
|
||||
let n = values.len() as f64;
|
||||
let mean: f64 = values.iter().sum::<f64>() / n;
|
||||
let variance: f64 = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n;
|
||||
let std_dev = variance.sqrt();
|
||||
|
||||
// 标准差为 0 时所有值相同,无异常
|
||||
if std_dev < 1e-10 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut anomalies = Vec::new();
|
||||
|
||||
for (date, value) in data {
|
||||
let deviation = (value - mean) / std_dev;
|
||||
if deviation.abs() > std_threshold {
|
||||
anomalies.push(AnomalyPoint {
|
||||
date: *date,
|
||||
value: *value,
|
||||
mean,
|
||||
std_dev,
|
||||
deviation,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
anomalies
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 单元测试
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// 辅助函数:创建 NaiveDate
|
||||
fn d(year: i32, month: u32, day: u32) -> NaiveDate {
|
||||
NaiveDate::from_ymd_opt(year, month, day).unwrap()
|
||||
}
|
||||
|
||||
// =======================================================================
|
||||
// compute_linear_regression 测试
|
||||
// =======================================================================
|
||||
|
||||
#[test]
|
||||
fn linear_regression_空数据返回_none() {
|
||||
let data: Vec<(NaiveDate, f64)> = vec![];
|
||||
assert!(compute_linear_regression(&data).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linear_regression_单点返回_none() {
|
||||
let data = vec![(d(2026, 1, 1), 100.0)];
|
||||
assert!(compute_linear_regression(&data).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linear_regression_完全水平线() {
|
||||
let data = vec![
|
||||
(d(2026, 1, 1), 100.0),
|
||||
(d(2026, 1, 2), 100.0),
|
||||
(d(2026, 1, 3), 100.0),
|
||||
(d(2026, 1, 4), 100.0),
|
||||
(d(2026, 1, 5), 100.0),
|
||||
];
|
||||
let result = compute_linear_regression(&data).unwrap();
|
||||
assert_eq!(result.direction, TrendDirection::Stable);
|
||||
assert!((result.slope).abs() < 1e-10);
|
||||
assert!((result.intercept - 100.0).abs() < 1e-10);
|
||||
assert!((result.r_squared - 1.0).abs() < 1e-10);
|
||||
assert!((result.daily_change).abs() < 1e-10);
|
||||
assert!((result.period_change).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linear_regression_严格递增() {
|
||||
let data = vec![
|
||||
(d(2026, 1, 1), 10.0),
|
||||
(d(2026, 1, 2), 20.0),
|
||||
(d(2026, 1, 3), 30.0),
|
||||
(d(2026, 1, 4), 40.0),
|
||||
(d(2026, 1, 5), 50.0),
|
||||
];
|
||||
let result = compute_linear_regression(&data).unwrap();
|
||||
assert_eq!(result.direction, TrendDirection::Rising);
|
||||
assert!((result.slope - 10.0).abs() < 1e-10);
|
||||
assert!((result.r_squared - 1.0).abs() < 1e-10);
|
||||
assert!((result.period_change - 40.0).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linear_regression_严格递减() {
|
||||
let data = vec![
|
||||
(d(2026, 1, 1), 50.0),
|
||||
(d(2026, 1, 2), 40.0),
|
||||
(d(2026, 1, 3), 30.0),
|
||||
(d(2026, 1, 4), 20.0),
|
||||
(d(2026, 1, 5), 10.0),
|
||||
];
|
||||
let result = compute_linear_regression(&data).unwrap();
|
||||
assert_eq!(result.direction, TrendDirection::Falling);
|
||||
assert!((result.slope - (-10.0)).abs() < 1e-10);
|
||||
assert!((result.r_squared - 1.0).abs() < 1e-10);
|
||||
assert!((result.period_change - (-40.0)).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linear_regression_含噪声但总体上升趋势() {
|
||||
// 上升趋势但带噪声
|
||||
let data = vec![
|
||||
(d(2026, 1, 1), 95.0),
|
||||
(d(2026, 1, 2), 102.0),
|
||||
(d(2026, 1, 3), 98.0),
|
||||
(d(2026, 1, 4), 108.0),
|
||||
(d(2026, 1, 5), 105.0),
|
||||
(d(2026, 1, 6), 112.0),
|
||||
(d(2026, 1, 7), 110.0),
|
||||
];
|
||||
let result = compute_linear_regression(&data).unwrap();
|
||||
assert_eq!(result.direction, TrendDirection::Rising);
|
||||
assert!(result.slope > 0.0);
|
||||
// R^2 应该小于 1(有噪声)
|
||||
assert!(result.r_squared < 1.0);
|
||||
assert!(result.r_squared > 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn linear_regression_period_change_等于_slope_乘天数跨度() {
|
||||
let data = vec![
|
||||
(d(2026, 1, 1), 100.0),
|
||||
(d(2026, 1, 3), 106.0), // 间隔 2 天
|
||||
];
|
||||
let result = compute_linear_regression(&data).unwrap();
|
||||
// slope = (106 - 100) / 2 = 3.0
|
||||
assert!((result.slope - 3.0).abs() < 1e-10);
|
||||
// period_change = slope * 2 = 6.0
|
||||
assert!((result.period_change - 6.0).abs() < 1e-10);
|
||||
}
|
||||
|
||||
// =======================================================================
|
||||
// compute_moving_average 测试
|
||||
// =======================================================================
|
||||
|
||||
#[test]
|
||||
fn moving_average_空输入返回空() {
|
||||
assert!(compute_moving_average(&[], 3).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn moving_average_window_为0返回空() {
|
||||
assert!(compute_moving_average(&[1.0, 2.0, 3.0], 0).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn moving_average_window_大于数据长度返回全nan() {
|
||||
let result = compute_moving_average(&[1.0, 2.0], 5);
|
||||
assert_eq!(result.len(), 2);
|
||||
assert!(result[0].is_nan());
|
||||
assert!(result[1].is_nan());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn moving_average_window_3基本用例() {
|
||||
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
|
||||
let result = compute_moving_average(&values, 3);
|
||||
assert_eq!(result.len(), 5);
|
||||
assert!(result[0].is_nan());
|
||||
assert!(result[1].is_nan());
|
||||
assert!((result[2] - 2.0).abs() < 1e-10); // (1+2+3)/3
|
||||
assert!((result[3] - 3.0).abs() < 1e-10); // (2+3+4)/3
|
||||
assert!((result[4] - 4.0).abs() < 1e-10); // (3+4+5)/3
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn moving_average_window_1等于原数据() {
|
||||
let values = vec![10.0, 20.0, 30.0];
|
||||
let result = compute_moving_average(&values, 1);
|
||||
assert_eq!(result.len(), 3);
|
||||
assert!((result[0] - 10.0).abs() < 1e-10);
|
||||
assert!((result[1] - 20.0).abs() < 1e-10);
|
||||
assert!((result[2] - 30.0).abs() < 1e-10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn moving_average_window_等于数据长度() {
|
||||
let values = vec![10.0, 20.0, 30.0];
|
||||
let result = compute_moving_average(&values, 3);
|
||||
assert_eq!(result.len(), 3);
|
||||
assert!(result[0].is_nan());
|
||||
assert!(result[1].is_nan());
|
||||
assert!((result[2] - 20.0).abs() < 1e-10); // (10+20+30)/3
|
||||
}
|
||||
|
||||
// =======================================================================
|
||||
// detect_anomalies 测试
|
||||
// =======================================================================
|
||||
|
||||
#[test]
|
||||
fn detect_anomalies_空数据返回空() {
|
||||
let data: Vec<(NaiveDate, f64)> = vec![];
|
||||
assert!(detect_anomalies(&data, 2.0).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_anomalies_单点返回空() {
|
||||
let data = vec![(d(2026, 1, 1), 100.0)];
|
||||
assert!(detect_anomalies(&data, 2.0).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_anomalies_两点返回空() {
|
||||
let data = vec![(d(2026, 1, 1), 100.0), (d(2026, 1, 2), 200.0)];
|
||||
assert!(detect_anomalies(&data, 2.0).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_anomalies_正常数据无异常() {
|
||||
// 所有数据在 98-102 范围内
|
||||
let data: Vec<(NaiveDate, f64)> = (0..10)
|
||||
.map(|i| (d(2026, 1, 1 + i as u32), 100.0 + (i as f64 * 0.2 - 1.0)))
|
||||
.collect();
|
||||
let anomalies = detect_anomalies(&data, 2.0);
|
||||
assert!(anomalies.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_anomalies_检测到单个异常点() {
|
||||
let data = vec![
|
||||
(d(2026, 1, 1), 100.0),
|
||||
(d(2026, 1, 2), 101.0),
|
||||
(d(2026, 1, 3), 99.0),
|
||||
(d(2026, 1, 4), 150.0), // 异常:偏离很大
|
||||
(d(2026, 1, 5), 100.0),
|
||||
(d(2026, 1, 6), 101.0),
|
||||
(d(2026, 1, 7), 99.0),
|
||||
];
|
||||
let anomalies = detect_anomalies(&data, 2.0);
|
||||
assert_eq!(anomalies.len(), 1);
|
||||
assert_eq!(anomalies[0].date, d(2026, 1, 4));
|
||||
assert!((anomalies[0].value - 150.0).abs() < 1e-10);
|
||||
assert!(anomalies[0].deviation > 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_anomalies_低阈值检测到更多异常() {
|
||||
let data = vec![
|
||||
(d(2026, 1, 1), 100.0),
|
||||
(d(2026, 1, 2), 110.0), // 偏离略高
|
||||
(d(2026, 1, 3), 90.0), // 偏离略低
|
||||
(d(2026, 1, 4), 100.0),
|
||||
(d(2026, 1, 5), 105.0),
|
||||
];
|
||||
// 阈值 1.0 应该比阈值 2.0 检测到更多异常
|
||||
let anomalies_1 = detect_anomalies(&data, 1.0);
|
||||
let anomalies_2 = detect_anomalies(&data, 2.0);
|
||||
assert!(anomalies_1.len() >= anomalies_2.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_anomalies_所有相同值无异常() {
|
||||
let data: Vec<(NaiveDate, f64)> = (0..5)
|
||||
.map(|i| (d(2026, 1, 1 + i as u32), 100.0))
|
||||
.collect();
|
||||
let anomalies = detect_anomalies(&data, 2.0);
|
||||
assert!(anomalies.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_anomalies_异常点包含完整信息() {
|
||||
let data = vec![
|
||||
(d(2026, 1, 1), 100.0),
|
||||
(d(2026, 1, 2), 100.0),
|
||||
(d(2026, 1, 3), 200.0), // 明显异常
|
||||
(d(2026, 1, 4), 100.0),
|
||||
(d(2026, 1, 5), 100.0),
|
||||
];
|
||||
let anomalies = detect_anomalies(&data, 1.5);
|
||||
assert_eq!(anomalies.len(), 1);
|
||||
let a = &anomalies[0];
|
||||
assert_eq!(a.date, d(2026, 1, 3));
|
||||
assert!((a.value - 200.0).abs() < 1e-10);
|
||||
// mean = (100+100+200+100+100)/5 = 120
|
||||
assert!((a.mean - 120.0).abs() < 1e-10);
|
||||
assert!(a.std_dev > 0.0);
|
||||
assert!(a.deviation > 1.5);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user