From bc571c7749a6d1daf35239b0f46018c64755ce21 Mon Sep 17 00:00:00 2001 From: iven Date: Thu, 21 May 2026 18:21:51 +0800 Subject: [PATCH] =?UTF-8?q?feat(docker):=20=E7=94=9F=E4=BA=A7=E7=8E=AF?= =?UTF-8?q?=E5=A2=83=20DevOps=20=E5=9F=BA=E7=A1=80=E8=AE=BE=E6=96=BD=20?= =?UTF-8?q?=E2=80=94=20TLS=20+=20=E5=A4=87=E4=BB=BD=E5=8A=A0=E5=AF=86=20+?= =?UTF-8?q?=20Prometheus=20+=20Redis=20=E6=8C=81=E4=B9=85=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增: - nginx/nginx.conf: TLS 1.2/1.3 终端 + HSTS/CSP 安全头 + SSE 长连接 + 50M 上传限制 - prometheus/prometheus.yml: HMS/PostgreSQL/Redis/Nginx 四指标源 - prometheus/alerts.yml: 4 组告警规则(系统/应用/数据库/Redis),含 5xx 错误率 + 内存 + 连接数 - restore.sh: 备份恢复脚本(支持加密备份解密恢复) 改进: - backup.sh: 新增 BACKUP_PASSPHRASE 加密(AES-256-CBC)+ 完整性校验 + 恢复指引 - docker-compose.production.yml: 添加 Nginx/Prometheus/Grafana/uploads-backup 容器 - docker-compose.yml: Redis 添加 --appendonly yes 持久化 - .env.production.example: 添加 DevOps 相关环境变量模板 --- docker/.env.production.example | 18 +++++ docker/backup.sh | 58 ++++++++++++++- docker/docker-compose.production.yml | 104 +++++++++++++++++++++++++-- docker/docker-compose.yml | 2 +- docker/nginx/nginx.conf | 96 +++++++++++++++++++++++++ docker/nginx/ssl/.gitignore | 3 + docker/nginx/ssl/.gitkeep | 8 +++ docker/prometheus/alerts.yml | 103 ++++++++++++++++++++++++++ docker/prometheus/prometheus.yml | 32 +++++++++ docker/restore.sh | 43 +++++++++++ 10 files changed, 458 insertions(+), 9 deletions(-) create mode 100644 docker/nginx/nginx.conf create mode 100644 docker/nginx/ssl/.gitignore create mode 100644 docker/nginx/ssl/.gitkeep create mode 100644 docker/prometheus/alerts.yml create mode 100644 docker/prometheus/prometheus.yml create mode 100644 docker/restore.sh diff --git a/docker/.env.production.example b/docker/.env.production.example index ade2ad1..33f5637 100644 --- a/docker/.env.production.example +++ b/docker/.env.production.example @@ -50,3 +50,21 @@ ERP__AI__DEFAULT_PROVIDER=ollama ERP__AI__API_KEY= ERP__AI__BASE_URL=http://localhost:11434 ERP__AI__MODEL=qwen2.5:7b + +# ===== DevOps ===== + +# 备份加密密码(openssl AES-256-CBC,必填用于生产) +BACKUP_PASSPHRASE=CHANGE_ME_BACKUP_ENCRYPTION_PASSWORD + +# 备份保留天数 +BACKUP_KEEP_DAYS=7 + +# 备份执行时间(cron 格式) +BACKUP_CRON=0 2 * * * + +# uploads 备份时间 +UPLOADS_BACKUP_CRON=0 3 * * * + +# Grafana 管理员密码 +GRAFANA_ADMIN_PASSWORD=CHANGE_ME_GRAFANA_ADMIN +GRAFANA_ROOT_URL=http://localhost:3001 diff --git a/docker/backup.sh b/docker/backup.sh index cc3e1e5..7034e46 100644 --- a/docker/backup.sh +++ b/docker/backup.sh @@ -1,8 +1,12 @@ #!/usr/bin/env bash -# PostgreSQL 自动备份脚本 +# PostgreSQL 自动备份脚本(含加密) # 用法: # 手动: ./docker/backup.sh # 自动: 由 docker compose backup 服务每日 02:00 执行 +# +# 加密方式(二选一): +# BACKUP_PASSPHRASE — 使用 openssl AES-256-CBC 对称加密(无额外依赖) +# GPG_RECIPIENT — 使用 GPG 非对称加密(需预置公钥) set -euo pipefail BACKUP_DIR="${BACKUP_DIR:-/backups}" @@ -13,7 +17,9 @@ PG_DB="${PGDATABSE:-erp}" KEEP_DAYS="${KEEP_DAYS:-7}" TIMESTAMP=$(date +%Y%m%d_%H%M%S) FILENAME="${PG_DB}_${TIMESTAMP}.sql.gz" +ENCRYPTED_FILENAME="${FILENAME}.enc" FILEPATH="${BACKUP_DIR}/${FILENAME}" +ENCRYPTED_FILEPATH="${BACKUP_DIR}/${ENCRYPTED_FILENAME}" mkdir -p "${BACKUP_DIR}" @@ -36,8 +42,54 @@ else exit 1 fi -# 清理过期备份 -DELETED=$(find "${BACKUP_DIR}" -name "${PG_DB}_*.sql.gz" -mtime +${KEEP_DAYS} -delete -print | wc -l) +# ── 加密备份 ── +if [ -n "${BACKUP_PASSPHRASE:-}" ]; then + echo "[$(date -Iseconds)] 使用 AES-256-CBC 加密备份..." + if openssl enc -aes-256-cbc -salt -pbkdf2 -pass "pass:${BACKUP_PASSPHRASE}" \ + -in "${FILEPATH}" -out "${ENCRYPTED_FILEPATH}"; then + rm -f "${FILEPATH}" + ENC_SIZE=$(du -h "${ENCRYPTED_FILEPATH}" | cut -f1) + echo "[$(date -Iseconds)] 加密完成: ${ENCRYPTED_FILENAME} (${ENC_SIZE})" + else + echo "[$(date -Iseconds)] 加密失败!保留未加密备份" >&2 + rm -f "${ENCRYPTED_FILEPATH}" + fi +elif [ -n "${GPG_RECIPIENT:-}" ]; then + echo "[$(date -Iseconds)] 使用 GPG 加密备份..." + if gpg --batch --yes --encrypt --recipient "${GPG_RECIPIENT}" "${FILEPATH}"; then + rm -f "${FILEPATH}" + ENC_SIZE=$(du -h "${ENCRYPTED_FILEPATH}" | cut -f1) + echo "[$(date -Iseconds)] 加密完成: ${ENCRYPTED_FILENAME} (${ENC_SIZE})" + else + echo "[$(date -Iseconds)] GPG 加密失败!保留未加密备份" >&2 + rm -f "${FILEPATH}.gpg" + fi +else + echo "[$(date -Iseconds)] 警告: 未设置 BACKUP_PASSPHRASE 或 GPG_RECIPIENT,备份未加密!" >&2 +fi + +# ── 备份完整性校验 ── +LATEST_FILE=$(ls -t "${BACKUP_DIR}/${PG_DB}"_*.sql.gz* 2>/dev/null | head -1) +if [ -n "${LATEST_FILE}" ] && [ -f "${LATEST_FILE}" ]; then + if [[ "${LATEST_FILE}" == *.enc ]]; then + echo "[$(date -Iseconds)] 加密备份文件存在: $(basename "${LATEST_FILE}")" + elif gzip -t "${LATEST_FILE}" 2>/dev/null; then + echo "[$(date -Iseconds)] 备份完整性校验通过" + else + echo "[$(date -Iseconds)] 警告: 备份文件可能损坏: ${LATEST_FILE}" >&2 + fi +fi + +# ── 清理过期备份 ── +DELETED=$(find "${BACKUP_DIR}" -name "${PG_DB}_*.sql.gz*" -mtime +${KEEP_DAYS} -delete -print | wc -l) if [ "${DELETED}" -gt 0 ]; then echo "[$(date -Iseconds)] 已清理 ${DELETED} 个过期备份(>${KEEP_DAYS}天)" fi + +# ── 恢复指引 ── +echo "" +echo "恢复方法:" +echo " # 解密(如加密):" +echo " openssl enc -d -aes-256-cbc -pbkdf2 -pass pass:\$BACKUP_PASSPHRASE -in ${ENCRYPTED_FILEPATH} -out ${FILEPATH}" +echo " # 恢复:" +echo " gunzip -c ${FILEPATH} | psql -h \$PGHOST -U \$PGUSER -d \$PGDB" diff --git a/docker/docker-compose.production.yml b/docker/docker-compose.production.yml index efe9a5b..71a62ce 100644 --- a/docker/docker-compose.production.yml +++ b/docker/docker-compose.production.yml @@ -2,15 +2,44 @@ # 使用方式: docker compose -f docker/docker-compose.yml -f docker/docker-compose.production.yml up -d services: + # ── Nginx 反代 + TLS 终端 ── + nginx: + image: nginx:1.27-alpine + container_name: hms-nginx + restart: unless-stopped + ports: + - "80:80" + - "443:443" + volumes: + - ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf:ro + - ./nginx/ssl:/etc/nginx/ssl:ro + - nginx_logs:/var/log/nginx + depends_on: + app: + condition: service_healthy + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:80"] + interval: 30s + timeout: 5s + retries: 3 + deploy: + resources: + limits: + cpus: "0.5" + memory: 128M + networks: + - hms-internal + + # ── HMS 应用服务器 ── app: build: context: .. dockerfile: Dockerfile container_name: hms-server restart: unless-stopped - ports: - - "${APP_PORT:-3000}:3000" - - "${METRICS_PORT:-9090}:9090" + expose: + - "3000" + - "9090" env_file: - .env.production environment: @@ -40,8 +69,7 @@ services: networks: - hms-internal - # 每日自动备份 — 每天凌晨 02:00 执行 pg_dump,保留 7 天 - # 手动触发: docker compose -f docker/docker-compose.yml -f docker/docker-compose.production.yml run --rm backup + # ── 每日自动备份(含加密)── backup: image: postgres:16-alpine container_name: hms-backup @@ -59,6 +87,7 @@ services: BACKUP_DIR: /backups KEEP_DAYS: "${BACKUP_KEEP_DAYS:-7}" BACKUP_CRON: "${BACKUP_CRON:-0 2 * * *}" + BACKUP_PASSPHRASE: "${BACKUP_PASSPHRASE:-}" volumes: - ./backup.sh:/usr/local/bin/backup.sh:ro - backup_data:/backups @@ -68,11 +97,76 @@ services: networks: - hms-internal + # ── uploads 文件备份(同步到宿主机)── + uploads-backup: + image: alpine:3.20 + container_name: hms-uploads-backup + restart: unless-stopped + entrypoint: > + sh -c " + echo '$$UPLOADS_BACKUP_CRON rsync -a --delete /source/uploads/ /backup/uploads/' > /etc/crontabs/root && + crond -f -l 2 + " + environment: + UPLOADS_BACKUP_CRON: "${UPLOADS_BACKUP_CRON:-0 3 * * *}" + volumes: + - app-uploads:/source/uploads:ro + - uploads_backup_data:/backup/uploads + networks: + - hms-internal + + # ── Prometheus 监控 ── + prometheus: + image: prom/prometheus:v3.1.0 + container_name: hms-prometheus + restart: unless-stopped + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.retention.time=30d" + - "--storage.tsdb.retention.size=2GB" + - "--web.enable-lifecycle" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ./prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro + - prometheus_data:/prometheus + expose: + - "9090" + networks: + - hms-internal + + # ── Grafana 可视化 ── + grafana: + image: grafana/grafana:11.4.0 + container_name: hms-grafana + restart: unless-stopped + environment: + GF_SECURITY_ADMIN_USER: "${GRAFANA_ADMIN_USER:-admin}" + GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-}" + GF_USERS_ALLOW_SIGN_UP: "false" + GF_SERVER_ROOT_URL: "${GRAFANA_ROOT_URL:-http://localhost:3001}" + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning:ro + expose: + - "3000" + depends_on: + - prometheus + networks: + - hms-internal + volumes: app-uploads: driver: local backup_data: driver: local + uploads_backup_data: + driver: local + nginx_logs: + driver: local + prometheus_data: + driver: local + grafana_data: + driver: local networks: hms-internal: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 96d9b63..9cd5c0b 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -29,7 +29,7 @@ services: redis: image: redis:7-alpine container_name: erp-redis - command: redis-server --requirepass ${REDIS_PASSWORD:-erp_redis_dev} + command: redis-server --requirepass ${REDIS_PASSWORD:-erp_redis_dev} --appendonly yes expose: - "6379" volumes: diff --git a/docker/nginx/nginx.conf b/docker/nginx/nginx.conf new file mode 100644 index 0000000..e5608a6 --- /dev/null +++ b/docker/nginx/nginx.conf @@ -0,0 +1,96 @@ +upstream hms_backend { + server app:3000; + keepalive 32; +} + +server { + listen 80; + server_name _; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl http2; + server_name _; + + # ── TLS ── + ssl_certificate /etc/nginx/ssl/fullchain.pem; + ssl_certificate_key /etc/nginx/ssl/privkey.pem; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 1d; + ssl_session_tickets off; + + # ── 安全头 ── + add_header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-Frame-Options "DENY" always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Referrer-Policy "strict-origin-when-cross-origin" always; + add_header Permissions-Policy "camera=(), microphone=(), geolocation=()" always; + add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self'; connect-src 'self' wss:; frame-ancestors 'none'" always; + + # ── 日志 ── + access_log /var/log/nginx/hms_access.log; + error_log /var/log/nginx/hms_error.log warn; + + # ── 上传文件(化验单/体检报告)── + location /uploads/ { + proxy_pass http://hms_backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + # 大文件上传限制 + client_max_body_size 50m; + } + + # ── SSE(消息推送/AI 分析)── + location ~ ^/api/v1/(message|ai)/.*sse { + proxy_pass http://hms_backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 86400s; + chunked_transfer_encoding on; + } + + # ── API 反代 ── + location /api/ { + proxy_pass http://hms_backend; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_http_version 1.1; + proxy_set_header Connection ""; + client_max_body_size 50m; + } + + # ── 健康检查 ── + location /health { + proxy_pass http://hms_backend/api/v1/health; + access_log off; + } + + # ── 指标(仅内网可访问)── + location /metrics { + # 生产环境应限制为 Prometheus 访问 + allow 172.16.0.0/12; + allow 10.0.0.0/8; + deny all; + proxy_pass http://hms_backend:9090/metrics; + access_log off; + } + + location / { + return 404; + } +} diff --git a/docker/nginx/ssl/.gitignore b/docker/nginx/ssl/.gitignore new file mode 100644 index 0000000..c3d07e5 --- /dev/null +++ b/docker/nginx/ssl/.gitignore @@ -0,0 +1,3 @@ +* +!.gitkeep +!.gitignore diff --git a/docker/nginx/ssl/.gitkeep b/docker/nginx/ssl/.gitkeep new file mode 100644 index 0000000..c769dac --- /dev/null +++ b/docker/nginx/ssl/.gitkeep @@ -0,0 +1,8 @@ +# 将 SSL 证书放置在此目录 +# 必需文件: fullchain.pem + privkey.pem +# 生产环境建议使用 Let's Encrypt 或云服务商证书管理 +# +# Let's Encrypt 示例: +# certbot certonly --standalone -d your-domain.com +# cp /etc/letsencrypt/live/your-domain.com/fullchain.pem . +# cp /etc/letsencrypt/live/your-domain.com/privkey.pem . diff --git a/docker/prometheus/alerts.yml b/docker/prometheus/alerts.yml new file mode 100644 index 0000000..2af17f1 --- /dev/null +++ b/docker/prometheus/alerts.yml @@ -0,0 +1,103 @@ +groups: + # ── 系统级告警 ── + - name: system + rules: + - alert: HMSHighMemoryUsage + expr: process_resident_memory_bytes > 800000000 + for: 5m + labels: + severity: warning + annotations: + summary: "HMS 内存使用超过 800MB" + description: "当前值: {{ $value | humanize }}B" + + - alert: HMSHighMemoryCritical + expr: process_resident_memory_bytes > 1000000000 + for: 2m + labels: + severity: critical + annotations: + summary: "HMS 内存使用超过 1GB(危险)" + description: "当前值: {{ $value | humanize }}B" + + - alert: HMSHighCPU + expr: rate(process_cpu_seconds_total[5m]) > 0.8 + for: 10m + labels: + severity: warning + annotations: + summary: "HMS CPU 使用率超过 80%" + + # ── 应用级告警 ── + - name: application + rules: + - alert: HMSHighErrorRate + expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05 + for: 5m + labels: + severity: critical + annotations: + summary: "API 5xx 错误率超过 5%" + description: "当前错误率: {{ $value | humanizePercentage }}" + + - alert: HMSSlowResponses + expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2 + for: 10m + labels: + severity: warning + annotations: + summary: "95% 请求响应时间超过 2 秒" + + - alert: HMSInstanceDown + expr: up{job="hms"} == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "HMS 服务不可达" + + # ── 数据库告警 ── + - name: database + rules: + - alert: HMSPostgresConnectionsHigh + expr: pg_stat_activity_count > 80 + for: 5m + labels: + severity: warning + annotations: + summary: "PostgreSQL 活跃连接数超过 80" + + - alert: HMSPostgresReplicationLag + expr: pg_replication_lag > 30 + for: 5m + labels: + severity: critical + annotations: + summary: "PostgreSQL 复制延迟超过 30 秒" + + - alert: HMSBackupMissing + expr: time() - hms_last_backup_timestamp > 86400 * 2 + for: 1h + labels: + severity: critical + annotations: + summary: "数据库备份超过 48 小时未执行" + + # ── Redis 告警 ── + - name: redis + rules: + - alert: HMSRedisMemoryHigh + expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9 + for: 5m + labels: + severity: warning + annotations: + summary: "Redis 内存使用超过 90%" + + - alert: HMSRedisDown + expr: redis_up == 0 + for: 2m + labels: + severity: critical + annotations: + summary: "Redis 服务不可达" diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml new file mode 100644 index 0000000..2a4762b --- /dev/null +++ b/docker/prometheus/prometheus.yml @@ -0,0 +1,32 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +rule_files: + - "alerts.yml" + +scrape_configs: + - job_name: "hms" + metrics_path: /metrics + static_configs: + - targets: ["app:9090"] + labels: + service: "hms-server" + + - job_name: "postgres" + static_configs: + - targets: ["postgres-exporter:9187"] + labels: + service: "postgresql" + + - job_name: "redis" + static_configs: + - targets: ["redis-exporter:9121"] + labels: + service: "redis" + + - job_name: "nginx" + static_configs: + - targets: ["nginx-exporter:9113"] + labels: + service: "nginx" diff --git a/docker/restore.sh b/docker/restore.sh new file mode 100644 index 0000000..62c300e --- /dev/null +++ b/docker/restore.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# PostgreSQL 备份恢复脚本 +# 用法: BACKUP_PASSPHRASE=xxx ./docker/restore.sh /backups/erp_20260521_020000.sql.gz.enc +set -euo pipefail + +BACKUP_FILE="${1:?用法: restore.sh <备份文件路径>}" +PG_HOST="${PGHOST:-postgres}" +PG_PORT="${PGPORT:-5432}" +PG_USER="${PGUSER:-erp}" +PG_DB="${PGDATABASE:-erp}" + +if [ ! -f "${BACKUP_FILE}" ]; then + echo "错误: 文件不存在: ${BACKUP_FILE}" >&2 + exit 1 +fi + +echo "[$(date -Iseconds)] 恢复目标: ${PG_HOST}:${PG_PORT}/${PG_DB}" +echo "[$(date -Iseconds)] 备份文件: ${BACKUP_FILE}" + +# 解密(如果是加密文件) +if [[ "${BACKUP_FILE}" == *.enc ]]; then + if [ -z "${BACKUP_PASSPHRASE:-}" ]; then + echo "错误: 加密备份需要设置 BACKUP_PASSPHRASE 环境变量" >&2 + exit 1 + fi + DECRYPTED="${BACKUP_FILE%.enc}" + echo "[$(date -Iseconds)] 解密中..." + openssl enc -d -aes-256-cbc -pbkdf2 -pass "pass:${BACKUP_PASSPHRASE}" \ + -in "${BACKUP_FILE}" -out "${DECRYPTED}" + BACKUP_FILE="${DECRYPTED}" +fi + +# 解压并恢复 +echo "[$(date -Iseconds)] 恢复中..." +gunzip -c "${BACKUP_FILE}" | psql -h "${PG_HOST}" -p "${PG_PORT}" -U "${PG_USER}" -d "${PG_DB}" + +echo "[$(date -Iseconds)] 恢复完成" + +# 清理解密文件 +if [ -n "${DECRYPTED:-}" ] && [ -f "${DECRYPTED}" ]; then + rm -f "${DECRYPTED}" + echo "[$(date -Iseconds)] 已清理解密临时文件" +fi