Files
hms/docker/docker-compose.production.yml
iven 984fca627b fix(docker): B1 alertmanager 告警渠道接线 ALERT_WEBHOOK_URL 环境变量注入
- config.yml url 写死 placeholder.invalid -> ${ALERT_WEBHOOK_URL}
- compose alertmanager 补 environment 注入(评估漏检:虽有 --config.expand-env=true 但容器内无变量可展开)
- 未配置时 fallback 占位 url 保 MVP 链路可启动(fail-fast 优于静默盲飞)
- .env.production.example 补 ALERT_WEBHOOK_URL 模板
- 上线评估 B1 代码层修复,真实 webhook 由 staging 填入
2026-06-26 15:18:43 +08:00

222 lines
6.3 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 生产环境 Docker Compose 配置
# 使用方式: docker compose -f docker/docker-compose.yml -f docker/docker-compose.production.yml up -d
services:
# ── Nginx 反代 + TLS 终端 ──
nginx:
image: nginx:1.27-alpine
container_name: hms-nginx
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
- nginx_logs:/var/log/nginx
depends_on:
app:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:80"]
interval: 30s
timeout: 5s
retries: 3
deploy:
resources:
limits:
cpus: "0.5"
memory: 128M
networks:
- hms-internal
# ── HMS 应用服务器 ──
app:
build:
context: ..
dockerfile: Dockerfile
container_name: hms-server
restart: unless-stopped
expose:
- "3000"
- "9090"
env_file:
- .env.production
environment:
ERP__DATABASE__URL: postgres://${POSTGRES_USER:-erp}:${POSTGRES_PASSWORD}@postgres:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-erp}
ERP__REDIS__URL: redis://:${REDIS_PASSWORD}@redis:${REDIS_PORT:-6379}
volumes:
- app-uploads:/app/uploads
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/api/v1/health"]
interval: 30s
timeout: 5s
start_period: 60s
retries: 3
deploy:
resources:
limits:
cpus: "2"
memory: 1024M
reservations:
cpus: "0.5"
memory: 256M
networks:
- hms-internal
# ── 每日自动备份(含加密)──
backup:
image: postgres:16-alpine
container_name: hms-backup
restart: unless-stopped
entrypoint: >
sh -c "
echo '$$BACKUP_CRON /usr/local/bin/backup.sh' > /etc/crontabs/root &&
crond -f -l 2
"
environment:
PGHOST: postgres
PGPORT: "${POSTGRES_PORT:-5432}"
PGUSER: "${POSTGRES_USER:-erp}"
PGDATABASE: "${POSTGRES_DB:-erp}"
BACKUP_DIR: /backups
KEEP_DAYS: "${BACKUP_KEEP_DAYS:-7}"
BACKUP_CRON: "${BACKUP_CRON:-0 2 * * *}"
BACKUP_PASSPHRASE: "${BACKUP_PASSPHRASE:-}"
volumes:
- ./backup.sh:/usr/local/bin/backup.sh:ro
- backup_data:/backups
depends_on:
postgres:
condition: service_healthy
networks:
- hms-internal
# ── uploads 文件备份(同步到宿主机)──
uploads-backup:
image: alpine:3.20
container_name: hms-uploads-backup
restart: unless-stopped
entrypoint: >
sh -c "
echo '$$UPLOADS_BACKUP_CRON rsync -a --delete /source/uploads/ /backup/uploads/' > /etc/crontabs/root &&
crond -f -l 2
"
environment:
UPLOADS_BACKUP_CRON: "${UPLOADS_BACKUP_CRON:-0 3 * * *}"
volumes:
- app-uploads:/source/uploads:ro
- uploads_backup_data:/backup/uploads
networks:
- hms-internal
# ── Prometheus 监控 ──
prometheus:
image: prom/prometheus:v3.1.0
container_name: hms-prometheus
restart: unless-stopped
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.retention.time=30d"
- "--storage.tsdb.retention.size=2GB"
- "--web.enable-lifecycle"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus_data:/prometheus
expose:
- "9090"
networks:
- hms-internal
# ── Alertmanager 告警通知出口 ──
# PP-04: 之前 11 条告警规则在 prometheus 加载但无 alertmanager告警触发无人知晓
alertmanager:
image: prom/alertmanager:v0.27.0
container_name: hms-alertmanager
restart: unless-stopped
# ALERT_WEBHOOK_URL 从宿主机 .env.production 注入容器,供 config.yml 的 ${ALERT_WEBHOOK_URL} 展开。
# 未配置时 fallback 占位 url保持 MVP 链路可启动;上线前在 .env.production 填真实钉钉/企微 webhook。
environment:
ALERT_WEBHOOK_URL: "${ALERT_WEBHOOK_URL:-http://placeholder.invalid/alert}"
volumes:
- ./alertmanager/config.yml:/etc/alertmanager/config.yml:ro
- alertmanager_data:/alertmanager
command:
- "--config.file=/etc/alertmanager/config.yml"
- "--config.expand-env=true"
- "--storage.path=/alertmanager"
expose:
- "9093"
networks:
- hms-internal
# ── Grafana 可视化 ──
grafana:
image: grafana/grafana:11.4.0
container_name: hms-grafana
restart: unless-stopped
environment:
GF_SECURITY_ADMIN_USER: "${GRAFANA_ADMIN_USER:-admin}"
GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-}"
GF_USERS_ALLOW_SIGN_UP: "false"
GF_SERVER_ROOT_URL: "${GRAFANA_ROOT_URL:-http://localhost:3001}"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
expose:
- "3000"
depends_on:
- prometheus
networks:
- hms-internal
# ── Prometheus exportersPP-04之前 prometheus.yml 配了 target 但服务未部署,告警永不触发)──
postgres-exporter:
image: prometheuscommunity/postgres-exporter:v0.15.0
container_name: hms-postgres-exporter
restart: unless-stopped
environment:
DATA_SOURCE_NAME: "postgresql://${POSTGRES_USER:-erp}:${POSTGRES_PASSWORD}@postgres:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-erp}?sslmode=disable"
expose:
- "9187"
networks:
- hms-internal
redis-exporter:
image: oliver006/redis_exporter:v1.66.0
container_name: hms-redis-exporter
restart: unless-stopped
environment:
REDIS_ADDR: "redis://redis:${REDIS_PORT:-6379}"
REDIS_PASSWORD: "${REDIS_PASSWORD:-erp_redis_dev}"
expose:
- "9121"
networks:
- hms-internal
volumes:
app-uploads:
driver: local
backup_data:
driver: local
uploads_backup_data:
driver: local
nginx_logs:
driver: local
prometheus_data:
driver: local
grafana_data:
driver: local
alertmanager_data:
driver: local
networks:
hms-internal:
driver: bridge