Files
nj/docker/prometheus/alerts.yml
iven c539e6fd83 feat: initialize Nuanji (Warm Notes) project
- Base platform from base.git (ERP base: auth, core, config, message, workflow, plugin)
- Created erp-diary module skeleton (lib.rs, dto.rs, error.rs, event.rs, state.rs)
- Integrated erp-diary into workspace and erp-server
- Added DiaryModule registration in main.rs
- Added DiaryState FromRef in state.rs
- Diary routes mounted (empty routes, ready for implementation)
- Product design spec v1.2 preserved in docs/
- Implementation plan preserved in plans/

Cargo check: OK
Cargo test: OK (78+ base tests passing)
2026-05-31 20:52:19 +08:00

104 lines
2.9 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
groups:
# ── 系统级告警 ──
- name: system
rules:
- alert: HMSHighMemoryUsage
expr: process_resident_memory_bytes > 800000000
for: 5m
labels:
severity: warning
annotations:
summary: "HMS 内存使用超过 800MB"
description: "当前值: {{ $value | humanize }}B"
- alert: HMSHighMemoryCritical
expr: process_resident_memory_bytes > 1000000000
for: 2m
labels:
severity: critical
annotations:
summary: "HMS 内存使用超过 1GB危险"
description: "当前值: {{ $value | humanize }}B"
- alert: HMSHighCPU
expr: rate(process_cpu_seconds_total[5m]) > 0.8
for: 10m
labels:
severity: warning
annotations:
summary: "HMS CPU 使用率超过 80%"
# ── 应用级告警 ──
- name: application
rules:
- alert: HMSHighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "API 5xx 错误率超过 5%"
description: "当前错误率: {{ $value | humanizePercentage }}"
- alert: HMSSlowResponses
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 10m
labels:
severity: warning
annotations:
summary: "95% 请求响应时间超过 2 秒"
- alert: HMSInstanceDown
expr: up{job="hms"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "HMS 服务不可达"
# ── 数据库告警 ──
- name: database
rules:
- alert: HMSPostgresConnectionsHigh
expr: pg_stat_activity_count > 80
for: 5m
labels:
severity: warning
annotations:
summary: "PostgreSQL 活跃连接数超过 80"
- alert: HMSPostgresReplicationLag
expr: pg_replication_lag > 30
for: 5m
labels:
severity: critical
annotations:
summary: "PostgreSQL 复制延迟超过 30 秒"
- alert: HMSBackupMissing
expr: time() - hms_last_backup_timestamp > 86400 * 2
for: 1h
labels:
severity: critical
annotations:
summary: "数据库备份超过 48 小时未执行"
# ── Redis 告警 ──
- name: redis
rules:
- alert: HMSRedisMemoryHigh
expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "Redis 内存使用超过 90%"
- alert: HMSRedisDown
expr: redis_up == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Redis 服务不可达"