feat(docker): 生产环境 DevOps 基础设施 — TLS + 备份加密 + Prometheus + Redis 持久化

新增:
- nginx/nginx.conf: TLS 1.2/1.3 终端 + HSTS/CSP 安全头 + SSE 长连接 + 50M 上传限制
- prometheus/prometheus.yml: HMS/PostgreSQL/Redis/Nginx 四指标源
- prometheus/alerts.yml: 4 组告警规则(系统/应用/数据库/Redis),含 5xx 错误率 + 内存 + 连接数
- restore.sh: 备份恢复脚本(支持加密备份解密恢复)

改进:
- backup.sh: 新增 BACKUP_PASSPHRASE 加密(AES-256-CBC)+ 完整性校验 + 恢复指引
- docker-compose.production.yml: 添加 Nginx/Prometheus/Grafana/uploads-backup 容器
- docker-compose.yml: Redis 添加 --appendonly yes 持久化
- .env.production.example: 添加 DevOps 相关环境变量模板
This commit is contained in:
iven
2026-05-21 18:21:51 +08:00
parent 8e616f2210
commit bc571c7749
10 changed files with 458 additions and 9 deletions

View File

@@ -50,3 +50,21 @@ ERP__AI__DEFAULT_PROVIDER=ollama
ERP__AI__API_KEY=
ERP__AI__BASE_URL=http://localhost:11434
ERP__AI__MODEL=qwen2.5:7b
# ===== DevOps =====
# 备份加密密码openssl AES-256-CBC必填用于生产
BACKUP_PASSPHRASE=CHANGE_ME_BACKUP_ENCRYPTION_PASSWORD
# 备份保留天数
BACKUP_KEEP_DAYS=7
# 备份执行时间cron 格式)
BACKUP_CRON=0 2 * * *
# uploads 备份时间
UPLOADS_BACKUP_CRON=0 3 * * *
# Grafana 管理员密码
GRAFANA_ADMIN_PASSWORD=CHANGE_ME_GRAFANA_ADMIN
GRAFANA_ROOT_URL=http://localhost:3001

View File

@@ -1,8 +1,12 @@
#!/usr/bin/env bash
# PostgreSQL 自动备份脚本
# PostgreSQL 自动备份脚本(含加密)
# 用法:
# 手动: ./docker/backup.sh
# 自动: 由 docker compose backup 服务每日 02:00 执行
#
# 加密方式(二选一):
# BACKUP_PASSPHRASE — 使用 openssl AES-256-CBC 对称加密(无额外依赖)
# GPG_RECIPIENT — 使用 GPG 非对称加密(需预置公钥)
set -euo pipefail
BACKUP_DIR="${BACKUP_DIR:-/backups}"
@@ -13,7 +17,9 @@ PG_DB="${PGDATABSE:-erp}"
KEEP_DAYS="${KEEP_DAYS:-7}"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
FILENAME="${PG_DB}_${TIMESTAMP}.sql.gz"
ENCRYPTED_FILENAME="${FILENAME}.enc"
FILEPATH="${BACKUP_DIR}/${FILENAME}"
ENCRYPTED_FILEPATH="${BACKUP_DIR}/${ENCRYPTED_FILENAME}"
mkdir -p "${BACKUP_DIR}"
@@ -36,8 +42,54 @@ else
exit 1
fi
# 清理过期备份
DELETED=$(find "${BACKUP_DIR}" -name "${PG_DB}_*.sql.gz" -mtime +${KEEP_DAYS} -delete -print | wc -l)
# ── 加密备份 ──
if [ -n "${BACKUP_PASSPHRASE:-}" ]; then
echo "[$(date -Iseconds)] 使用 AES-256-CBC 加密备份..."
if openssl enc -aes-256-cbc -salt -pbkdf2 -pass "pass:${BACKUP_PASSPHRASE}" \
-in "${FILEPATH}" -out "${ENCRYPTED_FILEPATH}"; then
rm -f "${FILEPATH}"
ENC_SIZE=$(du -h "${ENCRYPTED_FILEPATH}" | cut -f1)
echo "[$(date -Iseconds)] 加密完成: ${ENCRYPTED_FILENAME} (${ENC_SIZE})"
else
echo "[$(date -Iseconds)] 加密失败!保留未加密备份" >&2
rm -f "${ENCRYPTED_FILEPATH}"
fi
elif [ -n "${GPG_RECIPIENT:-}" ]; then
echo "[$(date -Iseconds)] 使用 GPG 加密备份..."
if gpg --batch --yes --encrypt --recipient "${GPG_RECIPIENT}" "${FILEPATH}"; then
rm -f "${FILEPATH}"
ENC_SIZE=$(du -h "${ENCRYPTED_FILEPATH}" | cut -f1)
echo "[$(date -Iseconds)] 加密完成: ${ENCRYPTED_FILENAME} (${ENC_SIZE})"
else
echo "[$(date -Iseconds)] GPG 加密失败!保留未加密备份" >&2
rm -f "${FILEPATH}.gpg"
fi
else
echo "[$(date -Iseconds)] 警告: 未设置 BACKUP_PASSPHRASE 或 GPG_RECIPIENT备份未加密" >&2
fi
# ── 备份完整性校验 ──
LATEST_FILE=$(ls -t "${BACKUP_DIR}/${PG_DB}"_*.sql.gz* 2>/dev/null | head -1)
if [ -n "${LATEST_FILE}" ] && [ -f "${LATEST_FILE}" ]; then
if [[ "${LATEST_FILE}" == *.enc ]]; then
echo "[$(date -Iseconds)] 加密备份文件存在: $(basename "${LATEST_FILE}")"
elif gzip -t "${LATEST_FILE}" 2>/dev/null; then
echo "[$(date -Iseconds)] 备份完整性校验通过"
else
echo "[$(date -Iseconds)] 警告: 备份文件可能损坏: ${LATEST_FILE}" >&2
fi
fi
# ── 清理过期备份 ──
DELETED=$(find "${BACKUP_DIR}" -name "${PG_DB}_*.sql.gz*" -mtime +${KEEP_DAYS} -delete -print | wc -l)
if [ "${DELETED}" -gt 0 ]; then
echo "[$(date -Iseconds)] 已清理 ${DELETED} 个过期备份(>${KEEP_DAYS}天)"
fi
# ── 恢复指引 ──
echo ""
echo "恢复方法:"
echo " # 解密(如加密):"
echo " openssl enc -d -aes-256-cbc -pbkdf2 -pass pass:\$BACKUP_PASSPHRASE -in ${ENCRYPTED_FILEPATH} -out ${FILEPATH}"
echo " # 恢复:"
echo " gunzip -c ${FILEPATH} | psql -h \$PGHOST -U \$PGUSER -d \$PGDB"

View File

@@ -2,15 +2,44 @@
# 使用方式: docker compose -f docker/docker-compose.yml -f docker/docker-compose.production.yml up -d
services:
# ── Nginx 反代 + TLS 终端 ──
nginx:
image: nginx:1.27-alpine
container_name: hms-nginx
restart: unless-stopped
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf:ro
- ./nginx/ssl:/etc/nginx/ssl:ro
- nginx_logs:/var/log/nginx
depends_on:
app:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:80"]
interval: 30s
timeout: 5s
retries: 3
deploy:
resources:
limits:
cpus: "0.5"
memory: 128M
networks:
- hms-internal
# ── HMS 应用服务器 ──
app:
build:
context: ..
dockerfile: Dockerfile
container_name: hms-server
restart: unless-stopped
ports:
- "${APP_PORT:-3000}:3000"
- "${METRICS_PORT:-9090}:9090"
expose:
- "3000"
- "9090"
env_file:
- .env.production
environment:
@@ -40,8 +69,7 @@ services:
networks:
- hms-internal
# 每日自动备份 — 每天凌晨 02:00 执行 pg_dump保留 7 天
# 手动触发: docker compose -f docker/docker-compose.yml -f docker/docker-compose.production.yml run --rm backup
# ── 每日自动备份(含加密)──
backup:
image: postgres:16-alpine
container_name: hms-backup
@@ -59,6 +87,7 @@ services:
BACKUP_DIR: /backups
KEEP_DAYS: "${BACKUP_KEEP_DAYS:-7}"
BACKUP_CRON: "${BACKUP_CRON:-0 2 * * *}"
BACKUP_PASSPHRASE: "${BACKUP_PASSPHRASE:-}"
volumes:
- ./backup.sh:/usr/local/bin/backup.sh:ro
- backup_data:/backups
@@ -68,11 +97,76 @@ services:
networks:
- hms-internal
# ── uploads 文件备份(同步到宿主机)──
uploads-backup:
image: alpine:3.20
container_name: hms-uploads-backup
restart: unless-stopped
entrypoint: >
sh -c "
echo '$$UPLOADS_BACKUP_CRON rsync -a --delete /source/uploads/ /backup/uploads/' > /etc/crontabs/root &&
crond -f -l 2
"
environment:
UPLOADS_BACKUP_CRON: "${UPLOADS_BACKUP_CRON:-0 3 * * *}"
volumes:
- app-uploads:/source/uploads:ro
- uploads_backup_data:/backup/uploads
networks:
- hms-internal
# ── Prometheus 监控 ──
prometheus:
image: prom/prometheus:v3.1.0
container_name: hms-prometheus
restart: unless-stopped
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.retention.time=30d"
- "--storage.tsdb.retention.size=2GB"
- "--web.enable-lifecycle"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus_data:/prometheus
expose:
- "9090"
networks:
- hms-internal
# ── Grafana 可视化 ──
grafana:
image: grafana/grafana:11.4.0
container_name: hms-grafana
restart: unless-stopped
environment:
GF_SECURITY_ADMIN_USER: "${GRAFANA_ADMIN_USER:-admin}"
GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-}"
GF_USERS_ALLOW_SIGN_UP: "false"
GF_SERVER_ROOT_URL: "${GRAFANA_ROOT_URL:-http://localhost:3001}"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
expose:
- "3000"
depends_on:
- prometheus
networks:
- hms-internal
volumes:
app-uploads:
driver: local
backup_data:
driver: local
uploads_backup_data:
driver: local
nginx_logs:
driver: local
prometheus_data:
driver: local
grafana_data:
driver: local
networks:
hms-internal:

View File

@@ -29,7 +29,7 @@ services:
redis:
image: redis:7-alpine
container_name: erp-redis
command: redis-server --requirepass ${REDIS_PASSWORD:-erp_redis_dev}
command: redis-server --requirepass ${REDIS_PASSWORD:-erp_redis_dev} --appendonly yes
expose:
- "6379"
volumes:

96
docker/nginx/nginx.conf Normal file
View File

@@ -0,0 +1,96 @@
upstream hms_backend {
server app:3000;
keepalive 32;
}
server {
listen 80;
server_name _;
return 301 https://$host$request_uri;
}
server {
listen 443 ssl http2;
server_name _;
# ── TLS ──
ssl_certificate /etc/nginx/ssl/fullchain.pem;
ssl_certificate_key /etc/nginx/ssl/privkey.pem;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384;
ssl_prefer_server_ciphers off;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 1d;
ssl_session_tickets off;
# ── 安全头 ──
add_header Strict-Transport-Security "max-age=63072000; includeSubDomains; preload" always;
add_header X-Content-Type-Options "nosniff" always;
add_header X-Frame-Options "DENY" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Permissions-Policy "camera=(), microphone=(), geolocation=()" always;
add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self'; connect-src 'self' wss:; frame-ancestors 'none'" always;
# ── 日志 ──
access_log /var/log/nginx/hms_access.log;
error_log /var/log/nginx/hms_error.log warn;
# ── 上传文件(化验单/体检报告)──
location /uploads/ {
proxy_pass http://hms_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# 大文件上传限制
client_max_body_size 50m;
}
# ── SSE消息推送/AI 分析)──
location ~ ^/api/v1/(message|ai)/.*sse {
proxy_pass http://hms_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_buffering off;
proxy_cache off;
proxy_read_timeout 86400s;
chunked_transfer_encoding on;
}
# ── API 反代 ──
location /api/ {
proxy_pass http://hms_backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_http_version 1.1;
proxy_set_header Connection "";
client_max_body_size 50m;
}
# ── 健康检查 ──
location /health {
proxy_pass http://hms_backend/api/v1/health;
access_log off;
}
# ── 指标(仅内网可访问)──
location /metrics {
# 生产环境应限制为 Prometheus 访问
allow 172.16.0.0/12;
allow 10.0.0.0/8;
deny all;
proxy_pass http://hms_backend:9090/metrics;
access_log off;
}
location / {
return 404;
}
}

3
docker/nginx/ssl/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
*
!.gitkeep
!.gitignore

View File

@@ -0,0 +1,8 @@
# 将 SSL 证书放置在此目录
# 必需文件: fullchain.pem + privkey.pem
# 生产环境建议使用 Let's Encrypt 或云服务商证书管理
#
# Let's Encrypt 示例:
# certbot certonly --standalone -d your-domain.com
# cp /etc/letsencrypt/live/your-domain.com/fullchain.pem .
# cp /etc/letsencrypt/live/your-domain.com/privkey.pem .

View File

@@ -0,0 +1,103 @@
groups:
# ── 系统级告警 ──
- name: system
rules:
- alert: HMSHighMemoryUsage
expr: process_resident_memory_bytes > 800000000
for: 5m
labels:
severity: warning
annotations:
summary: "HMS 内存使用超过 800MB"
description: "当前值: {{ $value | humanize }}B"
- alert: HMSHighMemoryCritical
expr: process_resident_memory_bytes > 1000000000
for: 2m
labels:
severity: critical
annotations:
summary: "HMS 内存使用超过 1GB危险"
description: "当前值: {{ $value | humanize }}B"
- alert: HMSHighCPU
expr: rate(process_cpu_seconds_total[5m]) > 0.8
for: 10m
labels:
severity: warning
annotations:
summary: "HMS CPU 使用率超过 80%"
# ── 应用级告警 ──
- name: application
rules:
- alert: HMSHighErrorRate
expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "API 5xx 错误率超过 5%"
description: "当前错误率: {{ $value | humanizePercentage }}"
- alert: HMSSlowResponses
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 10m
labels:
severity: warning
annotations:
summary: "95% 请求响应时间超过 2 秒"
- alert: HMSInstanceDown
expr: up{job="hms"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "HMS 服务不可达"
# ── 数据库告警 ──
- name: database
rules:
- alert: HMSPostgresConnectionsHigh
expr: pg_stat_activity_count > 80
for: 5m
labels:
severity: warning
annotations:
summary: "PostgreSQL 活跃连接数超过 80"
- alert: HMSPostgresReplicationLag
expr: pg_replication_lag > 30
for: 5m
labels:
severity: critical
annotations:
summary: "PostgreSQL 复制延迟超过 30 秒"
- alert: HMSBackupMissing
expr: time() - hms_last_backup_timestamp > 86400 * 2
for: 1h
labels:
severity: critical
annotations:
summary: "数据库备份超过 48 小时未执行"
# ── Redis 告警 ──
- name: redis
rules:
- alert: HMSRedisMemoryHigh
expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "Redis 内存使用超过 90%"
- alert: HMSRedisDown
expr: redis_up == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Redis 服务不可达"

View File

@@ -0,0 +1,32 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "alerts.yml"
scrape_configs:
- job_name: "hms"
metrics_path: /metrics
static_configs:
- targets: ["app:9090"]
labels:
service: "hms-server"
- job_name: "postgres"
static_configs:
- targets: ["postgres-exporter:9187"]
labels:
service: "postgresql"
- job_name: "redis"
static_configs:
- targets: ["redis-exporter:9121"]
labels:
service: "redis"
- job_name: "nginx"
static_configs:
- targets: ["nginx-exporter:9113"]
labels:
service: "nginx"

43
docker/restore.sh Normal file
View File

@@ -0,0 +1,43 @@
#!/usr/bin/env bash
# PostgreSQL 备份恢复脚本
# 用法: BACKUP_PASSPHRASE=xxx ./docker/restore.sh /backups/erp_20260521_020000.sql.gz.enc
set -euo pipefail
BACKUP_FILE="${1:?用法: restore.sh <备份文件路径>}"
PG_HOST="${PGHOST:-postgres}"
PG_PORT="${PGPORT:-5432}"
PG_USER="${PGUSER:-erp}"
PG_DB="${PGDATABASE:-erp}"
if [ ! -f "${BACKUP_FILE}" ]; then
echo "错误: 文件不存在: ${BACKUP_FILE}" >&2
exit 1
fi
echo "[$(date -Iseconds)] 恢复目标: ${PG_HOST}:${PG_PORT}/${PG_DB}"
echo "[$(date -Iseconds)] 备份文件: ${BACKUP_FILE}"
# 解密(如果是加密文件)
if [[ "${BACKUP_FILE}" == *.enc ]]; then
if [ -z "${BACKUP_PASSPHRASE:-}" ]; then
echo "错误: 加密备份需要设置 BACKUP_PASSPHRASE 环境变量" >&2
exit 1
fi
DECRYPTED="${BACKUP_FILE%.enc}"
echo "[$(date -Iseconds)] 解密中..."
openssl enc -d -aes-256-cbc -pbkdf2 -pass "pass:${BACKUP_PASSPHRASE}" \
-in "${BACKUP_FILE}" -out "${DECRYPTED}"
BACKUP_FILE="${DECRYPTED}"
fi
# 解压并恢复
echo "[$(date -Iseconds)] 恢复中..."
gunzip -c "${BACKUP_FILE}" | psql -h "${PG_HOST}" -p "${PG_PORT}" -U "${PG_USER}" -d "${PG_DB}"
echo "[$(date -Iseconds)] 恢复完成"
# 清理解密文件
if [ -n "${DECRYPTED:-}" ] && [ -f "${DECRYPTED}" ]; then
rm -f "${DECRYPTED}"
echo "[$(date -Iseconds)] 已清理解密临时文件"
fi