- Base platform from base.git (ERP base: auth, core, config, message, workflow, plugin) - Created erp-diary module skeleton (lib.rs, dto.rs, error.rs, event.rs, state.rs) - Integrated erp-diary into workspace and erp-server - Added DiaryModule registration in main.rs - Added DiaryState FromRef in state.rs - Diary routes mounted (empty routes, ready for implementation) - Product design spec v1.2 preserved in docs/ - Implementation plan preserved in plans/ Cargo check: OK Cargo test: OK (78+ base tests passing)
104 lines
2.9 KiB
YAML
104 lines
2.9 KiB
YAML
groups:
|
||
# ── 系统级告警 ──
|
||
- name: system
|
||
rules:
|
||
- alert: HMSHighMemoryUsage
|
||
expr: process_resident_memory_bytes > 800000000
|
||
for: 5m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "HMS 内存使用超过 800MB"
|
||
description: "当前值: {{ $value | humanize }}B"
|
||
|
||
- alert: HMSHighMemoryCritical
|
||
expr: process_resident_memory_bytes > 1000000000
|
||
for: 2m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "HMS 内存使用超过 1GB(危险)"
|
||
description: "当前值: {{ $value | humanize }}B"
|
||
|
||
- alert: HMSHighCPU
|
||
expr: rate(process_cpu_seconds_total[5m]) > 0.8
|
||
for: 10m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "HMS CPU 使用率超过 80%"
|
||
|
||
# ── 应用级告警 ──
|
||
- name: application
|
||
rules:
|
||
- alert: HMSHighErrorRate
|
||
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
|
||
for: 5m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "API 5xx 错误率超过 5%"
|
||
description: "当前错误率: {{ $value | humanizePercentage }}"
|
||
|
||
- alert: HMSSlowResponses
|
||
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
|
||
for: 10m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "95% 请求响应时间超过 2 秒"
|
||
|
||
- alert: HMSInstanceDown
|
||
expr: up{job="hms"} == 0
|
||
for: 2m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "HMS 服务不可达"
|
||
|
||
# ── 数据库告警 ──
|
||
- name: database
|
||
rules:
|
||
- alert: HMSPostgresConnectionsHigh
|
||
expr: pg_stat_activity_count > 80
|
||
for: 5m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "PostgreSQL 活跃连接数超过 80"
|
||
|
||
- alert: HMSPostgresReplicationLag
|
||
expr: pg_replication_lag > 30
|
||
for: 5m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "PostgreSQL 复制延迟超过 30 秒"
|
||
|
||
- alert: HMSBackupMissing
|
||
expr: time() - hms_last_backup_timestamp > 86400 * 2
|
||
for: 1h
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "数据库备份超过 48 小时未执行"
|
||
|
||
# ── Redis 告警 ──
|
||
- name: redis
|
||
rules:
|
||
- alert: HMSRedisMemoryHigh
|
||
expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9
|
||
for: 5m
|
||
labels:
|
||
severity: warning
|
||
annotations:
|
||
summary: "Redis 内存使用超过 90%"
|
||
|
||
- alert: HMSRedisDown
|
||
expr: redis_up == 0
|
||
for: 2m
|
||
labels:
|
||
severity: critical
|
||
annotations:
|
||
summary: "Redis 服务不可达"
|