Files
base/docker/prometheus/alerts.yml
iven 59856ac2fc feat: initialize ERP base platform (extracted from HMS)
- Stripped 11 business crates (health, ai, dialysis, plugins)
- Cleaned AppState, AppConfig, main.rs from business coupling
- Reduced migrations from 169 to 53 (base-only)
- Removed health_provider trait from erp-core
- Removed business integration tests
- Removed gateway rate limiting middleware
- Base capabilities: auth, RBAC, JWT, config, workflow, message, plugin, audit, crypto, RLS, multi-tenant

Cargo check: OK
Cargo test: OK
2026-05-31 20:35:57 +08:00

104 lines
2.9 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
groups:
# ── 系统级告警 ──
- name: system
rules:
- alert: HMSHighMemoryUsage
expr: process_resident_memory_bytes > 800000000
for: 5m
labels:
severity: warning
annotations:
summary: "HMS 内存使用超过 800MB"
description: "当前值: {{ $value | humanize }}B"
- alert: HMSHighMemoryCritical
expr: process_resident_memory_bytes > 1000000000
for: 2m
labels:
severity: critical
annotations:
summary: "HMS 内存使用超过 1GB危险"
description: "当前值: {{ $value | humanize }}B"
- alert: HMSHighCPU
expr: rate(process_cpu_seconds_total[5m]) > 0.8
for: 10m
labels:
severity: warning
annotations:
summary: "HMS CPU 使用率超过 80%"
# ── 应用级告警 ──
- name: application
rules:
- alert: HMSHighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "API 5xx 错误率超过 5%"
description: "当前错误率: {{ $value | humanizePercentage }}"
- alert: HMSSlowResponses
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 10m
labels:
severity: warning
annotations:
summary: "95% 请求响应时间超过 2 秒"
- alert: HMSInstanceDown
expr: up{job="hms"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "HMS 服务不可达"
# ── 数据库告警 ──
- name: database
rules:
- alert: HMSPostgresConnectionsHigh
expr: pg_stat_activity_count > 80
for: 5m
labels:
severity: warning
annotations:
summary: "PostgreSQL 活跃连接数超过 80"
- alert: HMSPostgresReplicationLag
expr: pg_replication_lag > 30
for: 5m
labels:
severity: critical
annotations:
summary: "PostgreSQL 复制延迟超过 30 秒"
- alert: HMSBackupMissing
expr: time() - hms_last_backup_timestamp > 86400 * 2
for: 1h
labels:
severity: critical
annotations:
summary: "数据库备份超过 48 小时未执行"
# ── Redis 告警 ──
- name: redis
rules:
- alert: HMSRedisMemoryHigh
expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "Redis 内存使用超过 90%"
- alert: HMSRedisDown
expr: redis_up == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Redis 服务不可达"