Files
hms/docker/grafana/provisioning/dashboards/json/hms-overview.json
iven ffbe5a797f feat(docker): PP-04 完善 — Grafana HMS 概览 dashboard + postgres/redis exporter + 渠道文档
延续 PP-04 MVP,补全可观测性闭环:
- grafana/provisioning/dashboards/json/hms-overview.json: HMS 概览 dashboard
  (服务状态/DB 连接池/EventBus 积压/内存 CPU/API 5xx 错误率,基于 app metrics)
- postgres-exporter + redis-exporter 服务: 之前 prometheus.yml 配了 target 但
  服务未部署(pg_stat_activity/redis_memory 等告警永不触发),现补齐
- alertmanager 启用 --config.expand-env: 支持渠道 token 用 \${VAR} 从 .env 注入
  (避免重蹈 PP-03 Redis 密码明文入 git 覆辙)
- alertmanager/README.md: 钉钉/企微/邮件渠道配置文档(上线前填)

nginx-exporter 跳过(alerts.yml 无 nginx 规则 + 需改 nginx.conf 配 stub_status)
2026-06-26 10:03:21 +08:00

116 lines
3.8 KiB
JSON

{
"uid": "hms-overview",
"title": "HMS 概览",
"tags": ["HMS", "overview"],
"timezone": "browser",
"schemaVersion": 39,
"version": 1,
"refresh": "30s",
"time": { "from": "now-1h", "to": "now" },
"panels": [
{
"id": 1,
"type": "stat",
"title": "HMS 服务状态",
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "up{job=\"hms\"}", "legendFormat": "" }],
"options": {
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"colorMode": "background",
"mappings": [
{
"type": "value",
"options": {
"0": { "text": "DOWN", "color": "red", "index": 0 },
"1": { "text": "UP", "color": "green", "index": 1 }
}
}
]
}
},
{
"id": 2,
"type": "stat",
"title": "EventBus 积压 (pending)",
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [{ "refId": "A", "expr": "eventbus_pending_total", "legendFormat": "" }],
"options": {
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"colorMode": "value",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 100 },
{ "color": "red", "value": 500 }
]
}
}
},
{
"id": 3,
"type": "stat",
"title": "API 5xx 错误率 (5m)",
"gridPos": { "h": 4, "w": 12, "x": 12, "y": 0 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [
{
"refId": "A",
"expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m])) / clamp_min(sum(rate(http_requests_total[5m])), 1)",
"legendFormat": "5xx ratio"
}
],
"options": {
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"colorMode": "background",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.01 },
{ "color": "red", "value": 0.05 }
]
},
"mappings": []
},
"fieldConfig": { "defaults": { "unit": "percentunit" } }
},
{
"id": 4,
"type": "timeseries",
"title": "DB 连接池(活跃 / 空闲)",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [
{ "refId": "A", "expr": "db_pool_connections_active", "legendFormat": "活跃" },
{ "refId": "B", "expr": "db_pool_connections_idle", "legendFormat": "空闲" }
],
"fieldConfig": { "defaults": { "unit": "short" } }
},
{
"id": 5,
"type": "timeseries",
"title": "进程内存 / CPU",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [
{ "refId": "A", "expr": "process_resident_memory_bytes", "legendFormat": "内存 (bytes)" },
{ "refId": "B", "expr": "rate(process_cpu_seconds_total[5m])", "legendFormat": "CPU (cores/s)" }
]
},
{
"id": 6,
"type": "timeseries",
"title": "EventBus 积压趋势",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 12 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"targets": [
{ "refId": "A", "expr": "eventbus_pending_total", "legendFormat": "pending events" }
],
"fieldConfig": { "defaults": { "unit": "short" } }
}
]
}