PP-04 核实属实:11 条告警规则在 prometheus 加载但无 alertmanager(告警 无通知出口),grafana provisioning 目录空,exporter 服务也未部署 ("配置齐全运行为零")。 MVP 打通告警链路 + 让 grafana 可用(不依赖 exporter,基于 app metrics): - docker-compose.production.yml 加 alertmanager 服务 + alertmanager_data 卷 - prometheus.yml 加 alerting 指向 alertmanager:9093 - alertmanager/config.yml 路由(SEV-1 critical 即时通知 + 分组) - grafana/provisioning/datasources 自动连 prometheus - grafana/provisioning/dashboards provider 就绪 待办(上线前):① alertmanager 占位 webhook 替换为真实渠道(钉钉/企微/邮件) ② 补 grafana dashboard JSON ③ 部署 postgres/redis/nginx exporter 让 prometheus 抓得到
38 lines
733 B
YAML
38 lines
733 B
YAML
global:
|
|
scrape_interval: 15s
|
|
evaluation_interval: 15s
|
|
|
|
rule_files:
|
|
- "alerts.yml"
|
|
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets: ["alertmanager:9093"]
|
|
|
|
scrape_configs:
|
|
- job_name: "hms"
|
|
metrics_path: /metrics
|
|
static_configs:
|
|
- targets: ["app:9090"]
|
|
labels:
|
|
service: "hms-server"
|
|
|
|
- job_name: "postgres"
|
|
static_configs:
|
|
- targets: ["postgres-exporter:9187"]
|
|
labels:
|
|
service: "postgresql"
|
|
|
|
- job_name: "redis"
|
|
static_configs:
|
|
- targets: ["redis-exporter:9121"]
|
|
labels:
|
|
service: "redis"
|
|
|
|
- job_name: "nginx"
|
|
static_configs:
|
|
- targets: ["nginx-exporter:9113"]
|
|
labels:
|
|
service: "nginx"
|