Files
hms/docker/docker-compose.production.yml
iven ffbe5a797f feat(docker): PP-04 完善 — Grafana HMS 概览 dashboard + postgres/redis exporter + 渠道文档
延续 PP-04 MVP,补全可观测性闭环:
- grafana/provisioning/dashboards/json/hms-overview.json: HMS 概览 dashboard
  (服务状态/DB 连接池/EventBus 积压/内存 CPU/API 5xx 错误率,基于 app metrics)
- postgres-exporter + redis-exporter 服务: 之前 prometheus.yml 配了 target 但
  服务未部署(pg_stat_activity/redis_memory 等告警永不触发),现补齐
- alertmanager 启用 --config.expand-env: 支持渠道 token 用 \${VAR} 从 .env 注入
  (避免重蹈 PP-03 Redis 密码明文入 git 覆辙)
- alertmanager/README.md: 钉钉/企微/邮件渠道配置文档(上线前填)

nginx-exporter 跳过(alerts.yml 无 nginx 规则 + 需改 nginx.conf 配 stub_status)
2026-06-26 10:03:21 +08:00

218 lines
5.9 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 生产环境 Docker Compose 配置
# 使用方式: docker compose -f docker/docker-compose.yml -f docker/docker-compose.production.yml up -d
services:
# ── Nginx 反代 + TLS 终端 ──
nginx:
image: nginx:1.27-alpine
container_name: hms-nginx
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
- nginx_logs:/var/log/nginx
depends_on:
app:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:80"]
interval: 30s
timeout: 5s
retries: 3
deploy:
resources:
limits:
cpus: "0.5"
memory: 128M
networks:
- hms-internal
# ── HMS 应用服务器 ──
app:
build:
context: ..
dockerfile: Dockerfile
container_name: hms-server
restart: unless-stopped
expose:
- "3000"
- "9090"
env_file:
- .env.production
environment:
ERP__DATABASE__URL: postgres://${POSTGRES_USER:-erp}:${POSTGRES_PASSWORD}@postgres:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-erp}
ERP__REDIS__URL: redis://:${REDIS_PASSWORD}@redis:${REDIS_PORT:-6379}
volumes:
- app-uploads:/app/uploads
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/api/v1/health"]
interval: 30s
timeout: 5s
start_period: 60s
retries: 3
deploy:
resources:
limits:
cpus: "2"
memory: 1024M
reservations:
cpus: "0.5"
memory: 256M
networks:
- hms-internal
# ── 每日自动备份(含加密)──
backup:
image: postgres:16-alpine
container_name: hms-backup
restart: unless-stopped
entrypoint: >
sh -c "
echo '$$BACKUP_CRON /usr/local/bin/backup.sh' > /etc/crontabs/root &&
crond -f -l 2
"
environment:
PGHOST: postgres
PGPORT: "${POSTGRES_PORT:-5432}"
PGUSER: "${POSTGRES_USER:-erp}"
PGDATABASE: "${POSTGRES_DB:-erp}"
BACKUP_DIR: /backups
KEEP_DAYS: "${BACKUP_KEEP_DAYS:-7}"
BACKUP_CRON: "${BACKUP_CRON:-0 2 * * *}"
BACKUP_PASSPHRASE: "${BACKUP_PASSPHRASE:-}"
volumes:
- ./backup.sh:/usr/local/bin/backup.sh:ro
- backup_data:/backups
depends_on:
postgres:
condition: service_healthy
networks:
- hms-internal
# ── uploads 文件备份(同步到宿主机)──
uploads-backup:
image: alpine:3.20
container_name: hms-uploads-backup
restart: unless-stopped
entrypoint: >
sh -c "
echo '$$UPLOADS_BACKUP_CRON rsync -a --delete /source/uploads/ /backup/uploads/' > /etc/crontabs/root &&
crond -f -l 2
"
environment:
UPLOADS_BACKUP_CRON: "${UPLOADS_BACKUP_CRON:-0 3 * * *}"
volumes:
- app-uploads:/source/uploads:ro
- uploads_backup_data:/backup/uploads
networks:
- hms-internal
# ── Prometheus 监控 ──
prometheus:
image: prom/prometheus:v3.1.0
container_name: hms-prometheus
restart: unless-stopped
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.retention.time=30d"
- "--storage.tsdb.retention.size=2GB"
- "--web.enable-lifecycle"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus_data:/prometheus
expose:
- "9090"
networks:
- hms-internal
# ── Alertmanager 告警通知出口 ──
# PP-04: 之前 11 条告警规则在 prometheus 加载但无 alertmanager告警触发无人知晓
alertmanager:
image: prom/alertmanager:v0.27.0
container_name: hms-alertmanager
restart: unless-stopped
volumes:
- ./alertmanager/config.yml:/etc/alertmanager/config.yml:ro
- alertmanager_data:/alertmanager
command:
- "--config.file=/etc/alertmanager/config.yml"
- "--config.expand-env=true"
- "--storage.path=/alertmanager"
expose:
- "9093"
networks:
- hms-internal
# ── Grafana 可视化 ──
grafana:
image: grafana/grafana:11.4.0
container_name: hms-grafana
restart: unless-stopped
environment:
GF_SECURITY_ADMIN_USER: "${GRAFANA_ADMIN_USER:-admin}"
GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-}"
GF_USERS_ALLOW_SIGN_UP: "false"
GF_SERVER_ROOT_URL: "${GRAFANA_ROOT_URL:-http://localhost:3001}"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
expose:
- "3000"
depends_on:
- prometheus
networks:
- hms-internal
# ── Prometheus exportersPP-04之前 prometheus.yml 配了 target 但服务未部署,告警永不触发)──
postgres-exporter:
image: prometheuscommunity/postgres-exporter:v0.15.0
container_name: hms-postgres-exporter
restart: unless-stopped
environment:
DATA_SOURCE_NAME: "postgresql://${POSTGRES_USER:-erp}:${POSTGRES_PASSWORD}@postgres:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-erp}?sslmode=disable"
expose:
- "9187"
networks:
- hms-internal
redis-exporter:
image: oliver006/redis_exporter:v1.66.0
container_name: hms-redis-exporter
restart: unless-stopped
environment:
REDIS_ADDR: "redis://redis:${REDIS_PORT:-6379}"
REDIS_PASSWORD: "${REDIS_PASSWORD:-erp_redis_dev}"
expose:
- "9121"
networks:
- hms-internal
volumes:
app-uploads:
driver: local
backup_data:
driver: local
uploads_backup_data:
driver: local
nginx_logs:
driver: local
prometheus_data:
driver: local
grafana_data:
driver: local
alertmanager_data:
driver: local
networks:
hms-internal:
driver: bridge