feat(health): 统计计算模块 — 线性回归、移动平均、异常检测
Some checks failed
CI / rust-check (push) Has been cancelled
CI / rust-test (push) Has been cancelled
CI / frontend-build (push) Has been cancelled
CI / security-audit (push) Has been cancelled

新增 trend_stats.rs 纯函数模块,提供三个统计计算能力:
- compute_linear_regression: 最小二乘法线性回归,返回 slope/intercept/R^2/方向/日变化/周期变化
- compute_moving_average: 简单移动平均,支持任意窗口大小
- detect_anomalies: 均值 +/- N 标准差异常检测

包含 21 个单元测试,覆盖边界条件和正常用例。
This commit is contained in:
iven
2026-04-28 19:50:46 +08:00
parent 781e1191a5
commit 4745b1e824
2 changed files with 457 additions and 0 deletions

View File

@@ -23,4 +23,5 @@ pub mod points_service;
pub mod seed;
pub mod stats_service;
pub mod trend_service;
pub mod trend_stats;
pub mod validation;

View File

@@ -0,0 +1,456 @@
//! 统计计算模块 — 线性回归、移动平均、异常检测
//!
//! 提供纯函数实现,不依赖数据库连接,方便单元测试。
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use std::fmt;
// ---------------------------------------------------------------------------
// 公共类型
// ---------------------------------------------------------------------------
/// 趋势方向枚举
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum TrendDirection {
Rising,
Falling,
Stable,
}
impl fmt::Display for TrendDirection {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TrendDirection::Rising => write!(f, "rising"),
TrendDirection::Falling => write!(f, "falling"),
TrendDirection::Stable => write!(f, "stable"),
}
}
}
/// 线性回归结果
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegressionResult {
/// 回归斜率
pub slope: f64,
/// 回归截距
pub intercept: f64,
/// 决定系数 R^2
pub r_squared: f64,
/// 趋势方向(基于 slope 判断)
pub direction: TrendDirection,
/// 日变化量(等于 slope
pub daily_change: f64,
/// 周期内总变化量slope * 数据点跨度天数)
pub period_change: f64,
}
/// 异常数据点
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnomalyPoint {
/// 异常日期
pub date: NaiveDate,
/// 实际值
pub value: f64,
/// 均值
pub mean: f64,
/// 标准差
pub std_dev: f64,
/// 偏离程度(偏离几个标准差,带正负号)
pub deviation: f64,
}
// ---------------------------------------------------------------------------
// 公共函数
// ---------------------------------------------------------------------------
/// 最小二乘法线性回归
///
/// 输入按日期排序的 (日期, 值) 序列,返回回归分析结果。
/// 当数据不足 2 个点时返回 None。
pub fn compute_linear_regression(data: &[(NaiveDate, f64)]) -> Option<RegressionResult> {
if data.len() < 2 {
return None;
}
let n = data.len() as f64;
// 将日期转换为相对于第一个日期的天数作为 x 值
let base_date = data[0].0;
let xs: Vec<f64> = data
.iter()
.map(|(d, _)| (*d - base_date).num_days() as f64)
.collect();
let ys: Vec<f64> = data.iter().map(|(_, v)| *v).collect();
let sum_x: f64 = xs.iter().sum();
let sum_y: f64 = ys.iter().sum();
let sum_xy: f64 = xs.iter().zip(ys.iter()).map(|(x, y)| x * y).sum();
let sum_x2: f64 = xs.iter().map(|x| x * x).sum();
let denominator = n * sum_x2 - sum_x * sum_x;
let slope = (n * sum_xy - sum_x * sum_y) / denominator;
let intercept = (sum_y - slope * sum_x) / n;
// 计算 R^2
let mean_y = sum_y / n;
let ss_tot: f64 = ys.iter().map(|y| (y - mean_y).powi(2)).sum();
let r_squared = if ss_tot > 0.0 {
let ss_res: f64 = ys
.iter()
.zip(xs.iter())
.map(|(y, x)| (y - (intercept + slope * x)).powi(2))
.sum();
1.0 - ss_res / ss_tot
} else {
// 所有 y 值相同,完美拟合
1.0
};
// 方向判断slope 接近 0 视为稳定
let direction = if slope.abs() < 1e-10 {
TrendDirection::Stable
} else if slope > 0.0 {
TrendDirection::Rising
} else {
TrendDirection::Falling
};
// period_change = slope * (最后一个 x - 第一个 x)
let x_span = xs[xs.len() - 1] - xs[0];
let period_change = slope * x_span;
Some(RegressionResult {
slope,
intercept,
r_squared,
direction,
daily_change: slope,
period_change,
})
}
/// 移动平均
///
/// 对数值序列计算指定窗口大小的简单移动平均。
/// 前 (window - 1) 个位置输出 NaN之后每个位置输出前 window 个值的平均。
/// 当 values 为空或 window 为 0 时返回空 Vec。
pub fn compute_moving_average(values: &[f64], window: usize) -> Vec<f64> {
if values.is_empty() || window == 0 {
return Vec::new();
}
let len = values.len();
let mut result = Vec::with_capacity(len);
for i in 0..len {
if i + 1 < window {
// 前 (window - 1) 个位置无法计算完整窗口
result.push(f64::NAN);
} else {
let sum: f64 = values[i + 1 - window..=i].iter().sum();
result.push(sum / window as f64);
}
}
result
}
/// 均值 +/- N 标准差异常检测
///
/// 对按日期排序的 (日期, 值) 序列,找出偏离均值超过 std_threshold 倍标准差的数据点。
/// 当数据不足 2 个点时返回空 Vec。
pub fn detect_anomalies(data: &[(NaiveDate, f64)], std_threshold: f64) -> Vec<AnomalyPoint> {
if data.len() < 3 {
return Vec::new();
}
let values: Vec<f64> = data.iter().map(|(_, v)| *v).collect();
let n = values.len() as f64;
let mean: f64 = values.iter().sum::<f64>() / n;
let variance: f64 = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n;
let std_dev = variance.sqrt();
// 标准差为 0 时所有值相同,无异常
if std_dev < 1e-10 {
return Vec::new();
}
let mut anomalies = Vec::new();
for (date, value) in data {
let deviation = (value - mean) / std_dev;
if deviation.abs() > std_threshold {
anomalies.push(AnomalyPoint {
date: *date,
value: *value,
mean,
std_dev,
deviation,
});
}
}
anomalies
}
// ---------------------------------------------------------------------------
// 单元测试
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
/// 辅助函数:创建 NaiveDate
fn d(year: i32, month: u32, day: u32) -> NaiveDate {
NaiveDate::from_ymd_opt(year, month, day).unwrap()
}
// =======================================================================
// compute_linear_regression 测试
// =======================================================================
#[test]
fn linear_regression_空数据返回_none() {
let data: Vec<(NaiveDate, f64)> = vec![];
assert!(compute_linear_regression(&data).is_none());
}
#[test]
fn linear_regression_单点返回_none() {
let data = vec![(d(2026, 1, 1), 100.0)];
assert!(compute_linear_regression(&data).is_none());
}
#[test]
fn linear_regression_完全水平线() {
let data = vec![
(d(2026, 1, 1), 100.0),
(d(2026, 1, 2), 100.0),
(d(2026, 1, 3), 100.0),
(d(2026, 1, 4), 100.0),
(d(2026, 1, 5), 100.0),
];
let result = compute_linear_regression(&data).unwrap();
assert_eq!(result.direction, TrendDirection::Stable);
assert!((result.slope).abs() < 1e-10);
assert!((result.intercept - 100.0).abs() < 1e-10);
assert!((result.r_squared - 1.0).abs() < 1e-10);
assert!((result.daily_change).abs() < 1e-10);
assert!((result.period_change).abs() < 1e-10);
}
#[test]
fn linear_regression_严格递增() {
let data = vec![
(d(2026, 1, 1), 10.0),
(d(2026, 1, 2), 20.0),
(d(2026, 1, 3), 30.0),
(d(2026, 1, 4), 40.0),
(d(2026, 1, 5), 50.0),
];
let result = compute_linear_regression(&data).unwrap();
assert_eq!(result.direction, TrendDirection::Rising);
assert!((result.slope - 10.0).abs() < 1e-10);
assert!((result.r_squared - 1.0).abs() < 1e-10);
assert!((result.period_change - 40.0).abs() < 1e-10);
}
#[test]
fn linear_regression_严格递减() {
let data = vec![
(d(2026, 1, 1), 50.0),
(d(2026, 1, 2), 40.0),
(d(2026, 1, 3), 30.0),
(d(2026, 1, 4), 20.0),
(d(2026, 1, 5), 10.0),
];
let result = compute_linear_regression(&data).unwrap();
assert_eq!(result.direction, TrendDirection::Falling);
assert!((result.slope - (-10.0)).abs() < 1e-10);
assert!((result.r_squared - 1.0).abs() < 1e-10);
assert!((result.period_change - (-40.0)).abs() < 1e-10);
}
#[test]
fn linear_regression_含噪声但总体上升趋势() {
// 上升趋势但带噪声
let data = vec![
(d(2026, 1, 1), 95.0),
(d(2026, 1, 2), 102.0),
(d(2026, 1, 3), 98.0),
(d(2026, 1, 4), 108.0),
(d(2026, 1, 5), 105.0),
(d(2026, 1, 6), 112.0),
(d(2026, 1, 7), 110.0),
];
let result = compute_linear_regression(&data).unwrap();
assert_eq!(result.direction, TrendDirection::Rising);
assert!(result.slope > 0.0);
// R^2 应该小于 1有噪声
assert!(result.r_squared < 1.0);
assert!(result.r_squared > 0.0);
}
#[test]
fn linear_regression_period_change_等于_slope_乘天数跨度() {
let data = vec![
(d(2026, 1, 1), 100.0),
(d(2026, 1, 3), 106.0), // 间隔 2 天
];
let result = compute_linear_regression(&data).unwrap();
// slope = (106 - 100) / 2 = 3.0
assert!((result.slope - 3.0).abs() < 1e-10);
// period_change = slope * 2 = 6.0
assert!((result.period_change - 6.0).abs() < 1e-10);
}
// =======================================================================
// compute_moving_average 测试
// =======================================================================
#[test]
fn moving_average_空输入返回空() {
assert!(compute_moving_average(&[], 3).is_empty());
}
#[test]
fn moving_average_window_为0返回空() {
assert!(compute_moving_average(&[1.0, 2.0, 3.0], 0).is_empty());
}
#[test]
fn moving_average_window_大于数据长度返回全nan() {
let result = compute_moving_average(&[1.0, 2.0], 5);
assert_eq!(result.len(), 2);
assert!(result[0].is_nan());
assert!(result[1].is_nan());
}
#[test]
fn moving_average_window_3基本用例() {
let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let result = compute_moving_average(&values, 3);
assert_eq!(result.len(), 5);
assert!(result[0].is_nan());
assert!(result[1].is_nan());
assert!((result[2] - 2.0).abs() < 1e-10); // (1+2+3)/3
assert!((result[3] - 3.0).abs() < 1e-10); // (2+3+4)/3
assert!((result[4] - 4.0).abs() < 1e-10); // (3+4+5)/3
}
#[test]
fn moving_average_window_1等于原数据() {
let values = vec![10.0, 20.0, 30.0];
let result = compute_moving_average(&values, 1);
assert_eq!(result.len(), 3);
assert!((result[0] - 10.0).abs() < 1e-10);
assert!((result[1] - 20.0).abs() < 1e-10);
assert!((result[2] - 30.0).abs() < 1e-10);
}
#[test]
fn moving_average_window_等于数据长度() {
let values = vec![10.0, 20.0, 30.0];
let result = compute_moving_average(&values, 3);
assert_eq!(result.len(), 3);
assert!(result[0].is_nan());
assert!(result[1].is_nan());
assert!((result[2] - 20.0).abs() < 1e-10); // (10+20+30)/3
}
// =======================================================================
// detect_anomalies 测试
// =======================================================================
#[test]
fn detect_anomalies_空数据返回空() {
let data: Vec<(NaiveDate, f64)> = vec![];
assert!(detect_anomalies(&data, 2.0).is_empty());
}
#[test]
fn detect_anomalies_单点返回空() {
let data = vec![(d(2026, 1, 1), 100.0)];
assert!(detect_anomalies(&data, 2.0).is_empty());
}
#[test]
fn detect_anomalies_两点返回空() {
let data = vec![(d(2026, 1, 1), 100.0), (d(2026, 1, 2), 200.0)];
assert!(detect_anomalies(&data, 2.0).is_empty());
}
#[test]
fn detect_anomalies_正常数据无异常() {
// 所有数据在 98-102 范围内
let data: Vec<(NaiveDate, f64)> = (0..10)
.map(|i| (d(2026, 1, 1 + i as u32), 100.0 + (i as f64 * 0.2 - 1.0)))
.collect();
let anomalies = detect_anomalies(&data, 2.0);
assert!(anomalies.is_empty());
}
#[test]
fn detect_anomalies_检测到单个异常点() {
let data = vec![
(d(2026, 1, 1), 100.0),
(d(2026, 1, 2), 101.0),
(d(2026, 1, 3), 99.0),
(d(2026, 1, 4), 150.0), // 异常:偏离很大
(d(2026, 1, 5), 100.0),
(d(2026, 1, 6), 101.0),
(d(2026, 1, 7), 99.0),
];
let anomalies = detect_anomalies(&data, 2.0);
assert_eq!(anomalies.len(), 1);
assert_eq!(anomalies[0].date, d(2026, 1, 4));
assert!((anomalies[0].value - 150.0).abs() < 1e-10);
assert!(anomalies[0].deviation > 2.0);
}
#[test]
fn detect_anomalies_低阈值检测到更多异常() {
let data = vec![
(d(2026, 1, 1), 100.0),
(d(2026, 1, 2), 110.0), // 偏离略高
(d(2026, 1, 3), 90.0), // 偏离略低
(d(2026, 1, 4), 100.0),
(d(2026, 1, 5), 105.0),
];
// 阈值 1.0 应该比阈值 2.0 检测到更多异常
let anomalies_1 = detect_anomalies(&data, 1.0);
let anomalies_2 = detect_anomalies(&data, 2.0);
assert!(anomalies_1.len() >= anomalies_2.len());
}
#[test]
fn detect_anomalies_所有相同值无异常() {
let data: Vec<(NaiveDate, f64)> = (0..5)
.map(|i| (d(2026, 1, 1 + i as u32), 100.0))
.collect();
let anomalies = detect_anomalies(&data, 2.0);
assert!(anomalies.is_empty());
}
#[test]
fn detect_anomalies_异常点包含完整信息() {
let data = vec![
(d(2026, 1, 1), 100.0),
(d(2026, 1, 2), 100.0),
(d(2026, 1, 3), 200.0), // 明显异常
(d(2026, 1, 4), 100.0),
(d(2026, 1, 5), 100.0),
];
let anomalies = detect_anomalies(&data, 1.5);
assert_eq!(anomalies.len(), 1);
let a = &anomalies[0];
assert_eq!(a.date, d(2026, 1, 3));
assert!((a.value - 200.0).abs() < 1e-10);
// mean = (100+100+200+100+100)/5 = 120
assert!((a.mean - 120.0).abs() < 1e-10);
assert!(a.std_dev > 0.0);
assert!(a.deviation > 1.5);
}
}