fix(docker): B1 alertmanager 告警渠道接线 ALERT_WEBHOOK_URL 环境变量注入
- config.yml url 写死 placeholder.invalid -> ${ALERT_WEBHOOK_URL}
- compose alertmanager 补 environment 注入(评估漏检:虽有 --config.expand-env=true 但容器内无变量可展开)
- 未配置时 fallback 占位 url 保 MVP 链路可启动(fail-fast 优于静默盲飞)
- .env.production.example 补 ALERT_WEBHOOK_URL 模板
- 上线评估 B1 代码层修复,真实 webhook 由 staging 填入
This commit is contained in:
@@ -68,3 +68,10 @@ UPLOADS_BACKUP_CRON=0 3 * * *
|
|||||||
# Grafana 管理员密码
|
# Grafana 管理员密码
|
||||||
GRAFANA_ADMIN_PASSWORD=CHANGE_ME_GRAFANA_ADMIN
|
GRAFANA_ADMIN_PASSWORD=CHANGE_ME_GRAFANA_ADMIN
|
||||||
GRAFANA_ROOT_URL=http://localhost:3001
|
GRAFANA_ROOT_URL=http://localhost:3001
|
||||||
|
|
||||||
|
# ===== 监控告警 =====
|
||||||
|
|
||||||
|
# Alertmanager 告警通知出口(上线前必填,否则 DB 宕机/5xx 飙升等告警发不到任何人)
|
||||||
|
# 钉钉机器人:https://oapi.dingtalk.com/robot/send?access_token=XXX
|
||||||
|
# 企业微信群机器人:https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=XXX
|
||||||
|
ALERT_WEBHOOK_URL=https://oapi.dingtalk.com/robot/send?access_token=CHANGE_ME
|
||||||
|
|||||||
@@ -1,12 +1,15 @@
|
|||||||
# Alertmanager 告警通知配置
|
# Alertmanager 告警通知配置
|
||||||
#
|
#
|
||||||
# ⚠️ TODO(上线前必填):将 receivers.default.webhook_configs 替换为真实通知渠道:
|
# 通知渠道由 ALERT_WEBHOOK_URL 环境变量注入(见 receivers.default.webhook_configs),
|
||||||
|
# 容器启用 --config.expand-env=true 展开。来源:docker/.env.production。
|
||||||
|
#
|
||||||
|
# ⚠️ 上线前必填(docker/.env.production.example 已给模板),否则告警发不到任何人:
|
||||||
# - 钉钉机器人:https://oapi.dingtalk.com/robot/send?access_token=XXX
|
# - 钉钉机器人:https://oapi.dingtalk.com/robot/send?access_token=XXX
|
||||||
# - 企业微信群机器人:https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=XXX
|
# - 企业微信群机器人:https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=XXX
|
||||||
# - 邮件 SMTP:配置 global.smtp_* + email_configs
|
# - 邮件 SMTP:配置 global.smtp_* + email_configs
|
||||||
#
|
#
|
||||||
# 当前为占位 webhook(指向无效端点),alertmanager 会启动但告警 POST 失败记日志。
|
# 未配置 ALERT_WEBHOOK_URL 时,compose 层 fallback 占位 url,alertmanager 可启动
|
||||||
# PP-04 MVP 目的:先打通 prometheus → alertmanager 链路,渠道上线前填。
|
# 但 POST 失败 —— fail-fast 优于 PP-04 之前"告警触发无人知晓"的盲飞状态。
|
||||||
|
|
||||||
global:
|
global:
|
||||||
resolve_timeout: 5m
|
resolve_timeout: 5m
|
||||||
@@ -28,7 +31,9 @@ route:
|
|||||||
|
|
||||||
receivers:
|
receivers:
|
||||||
- name: "default"
|
- name: "default"
|
||||||
# 占位:上线前替换为真实 webhook
|
# 真实通知渠道由 ALERT_WEBHOOK_URL 环境变量注入(alertmanager 启用 --config.expand-env=true)。
|
||||||
|
# 上线前必填:见 docker/.env.production.example。未配置时 compose 层 fallback 占位 url,
|
||||||
|
# alertmanager 可启动但 POST 失败 —— fail-fast 优于静默发到无效端点。
|
||||||
webhook_configs:
|
webhook_configs:
|
||||||
- url: "http://placeholder.invalid/alert"
|
- url: "${ALERT_WEBHOOK_URL}"
|
||||||
send_resolved: true
|
send_resolved: true
|
||||||
|
|||||||
@@ -140,6 +140,10 @@ services:
|
|||||||
image: prom/alertmanager:v0.27.0
|
image: prom/alertmanager:v0.27.0
|
||||||
container_name: hms-alertmanager
|
container_name: hms-alertmanager
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
# ALERT_WEBHOOK_URL 从宿主机 .env.production 注入容器,供 config.yml 的 ${ALERT_WEBHOOK_URL} 展开。
|
||||||
|
# 未配置时 fallback 占位 url,保持 MVP 链路可启动;上线前在 .env.production 填真实钉钉/企微 webhook。
|
||||||
|
environment:
|
||||||
|
ALERT_WEBHOOK_URL: "${ALERT_WEBHOOK_URL:-http://placeholder.invalid/alert}"
|
||||||
volumes:
|
volumes:
|
||||||
- ./alertmanager/config.yml:/etc/alertmanager/config.yml:ro
|
- ./alertmanager/config.yml:/etc/alertmanager/config.yml:ro
|
||||||
- alertmanager_data:/alertmanager
|
- alertmanager_data:/alertmanager
|
||||||
|
|||||||
Reference in New Issue
Block a user