PostgresDB备份镜像搭建

搭建一个镜像用于备份postgres容器,实现全备份(直接全备份、自动获取每个数据库并备份),自定义保留天数,备份策略。

准备工作

Dockerfile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# 使用轻量级 Debian 基础镜像
FROM debian:bullseye

# === 使用阿里云镜像源加速构建 ===
RUN sed -i 's|http://deb.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list && \
sed -i 's|http://security.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list

# === 添加 PostgreSQL 15 官方 APT 源 ===
RUN apt-get update && \
apt-get install -y wget gnupg lsb-release && \
wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \
echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list

# === 安装 PostgreSQL 15 客户端及其他依赖 ===
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y \
postgresql-client-15 \
cron \
tzdata \
procps \
vim-tiny \
ca-certificates && \
# 设置时区为中国上海
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
echo "Asia/Shanghai" > /etc/timezone && \
# 清理缓存,减少镜像体积
apt-get clean && rm -rf /var/lib/apt/lists/*

# === 设置环境变量 ===
ENV TZ=Asia/Shanghai \
LANG=C.UTF-8

# === 创建必要目录 ===
RUN mkdir -p /backup /var/log && chmod 777 /backup /var/log

# === 复制备份脚本 ===
COPY postgres_backup.sh /usr/local/bin/postgres_backup.sh
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /usr/local/bin/postgres_backup.sh /entrypoint.sh

# === 健康检查:确保 cron 在运行 ===
HEALTHCHECK --interval=60s --timeout=10s --start-period=10s --retries=3 \
CMD pgrep cron >/dev/null || (echo "cron not running" && exit 1)

# === 默认启动命令 ===
CMD ["/entrypoint.sh"]

entrypoint.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/bash

# 设置默认的定时任务时间(如果未提供 CRON_SCHEDULE)
CRON_SCHEDULE=${CRON_SCHEDULE:-"0 2 * * *"}
CRON_FILE="/etc/cron.d/postgres_backup"
BACKUP_SCRIPT="/usr/local/bin/postgres_backup.sh"

echo "[$(date +'%F %T')] ✅ Using cron schedule: $CRON_SCHEDULE"

#检查备份脚本是否存在
if [[ ! -x "$BACKUP_SCRIPT" ]]; then
echo "[$(date +'%F %T')] ❌ Backup script not found or not executable: $BACKUP_SCRIPT"
exit 1
fi
# 写入定时任务
#echo "$CRON_SCHEDULE root /usr/local/bin/backup.sh >> /var/log/postgres_backup.log 2>&1" > /etc/cron.d/postgres_backup
#echo "$CRON_SCHEDULE root /usr/local/bin/postgres_backup.sh >> /proc/1/fd/1 2>&1" > "$CRON_FILE"
# 使用 printf 写入 cron 任务,更稳定、无解释错误风险
printf "%s root %s >> /proc/1/fd/1 2>&1\n" "$CRON_SCHEDULE" "$BACKUP_SCRIPT" > "$CRON_FILE"
# 确保 cron 任务文件格式正确
chmod 0644 "$CRON_FILE"

# 显示已写入的任务
echo "[$(date +'%F %T')] === Cron job content:"
cat "$CRON_FILE"

# 应用 cron 任务
#crontab /etc/cron.d/postgres_backup

# 启动 cron 服务(前台)
echo "[$(date +'%F %T')] === Starting cron..."
cron -f

postgres_backup.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/bin/bash

# === 设置错误处理 ===
set -euo pipefail

# === 更安全的环境变量加载 ===
if [[ -f /.env ]]; then
# 从文件加载环境变量
export $(grep -E '^(POSTGRES_|BACKUP_DIR|RETENTION_DAYS|ALL_DATABASES|AUTO_DB_BACKUP)' /.env | xargs)
elif [[ -f /proc/1/environ ]]; then
# 从容器环境加载
while IFS= read -r -d '' line; do
if [[ "$line" =~ ^(POSTGRES_|BACKUP_DIR|RETENTION_DAYS|ALL_DATABASES|AUTO_DB_BACKUP) ]]; then
export "$line"
fi
done < /proc/1/environ
fi

# === 默认值 ===
BACKUP_DIR=${BACKUP_DIR:-"/backup"}
POSTGRES_HOST=${POSTGRES_HOST:-"192.168.99.203"}
POSTGRES_PORT=${POSTGRES_PORT:-"5432"}
POSTGRES_USER=${POSTGRES_USER:-"admin"}
POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-"StrongPass123"}
POSTGRES_DB=${POSTGRES_DB:-""}
RETENTION_DAYS=${RETENTION_DAYS:-7}
ALL_DATABASES=${ALL_DATABASES:-"false"}
AUTO_DB_BACKUP=${AUTO_DB_BACKUP:-"true"}

LOG_FILE="/var/log/postgres_backup.log"
mkdir -p "$(dirname "$LOG_FILE")" || true
mkdir -p "$BACKUP_DIR" || { echo "无法创建备份目录: $BACKUP_DIR"; exit 1; }

TIMESTAMP=$(date +'%Y%m%d_%H%M%S')
STATUS=0
SECONDS=0
FAILED_DATABASES=()

echo "[$(date +'%F %T')] ===== 开始 PostgreSQL 备份任务 =====" | tee -a "$LOG_FILE"

# === 测试数据库连接 ===
test_db_connection() {
export PGPASSWORD="$POSTGRES_PASSWORD"
echo "[$(date +'%F %T')] [INFO] 测试数据库连接..." | tee -a "$LOG_FILE"

if ! psql -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -d postgres -c "SELECT 1;" > /dev/null 2>&1; then
echo "[$(date +'%F %T')] [ERROR] 无法连接到 PostgreSQL 服务器" | tee -a "$LOG_FILE"
echo "[$(date +'%F %T')] [INFO] 请检查: " | tee -a "$LOG_FILE"
echo " - 主机: $POSTGRES_HOST" | tee -a "$LOG_FILE"
echo " - 端口: $POSTGRES_PORT" | tee -a "$LOG_FILE"
echo " - 用户: $POSTGRES_USER" | tee -a "$LOG_FILE"
echo " - 密码: ******" | tee -a "$LOG_FILE"
echo " - 网络连接" | tee -a "$LOG_FILE"
return 1
fi
echo "[$(date +'%F %T')] [OK] 数据库连接成功" | tee -a "$LOG_FILE"
return 0
}

# === 格式化时间函数 ===
format_time() {
local ELAPSED=$1
local H=$((ELAPSED / 3600))
local M=$(((ELAPSED % 3600) / 60))
local S=$((ELAPSED % 60))
local OUTPUT=""
[[ $H -gt 0 ]] && OUTPUT+="${H} 小时 "
[[ $M -gt 0 ]] && OUTPUT+="${M} 分 "
OUTPUT+="${S} 秒"
echo "$OUTPUT"
}

# === 单个数据库备份函数 ===
perform_backup() {
local DB_NAME="$1"
local OUTPUT_FILE="$2"
local RETRY=0
local MAX_RETRY=3
local START_TIME=$(date +%s)
local TMP_LOG="/tmp/postgres_backup_${DB_NAME}_$$.log"
local TMP_SQL="/tmp/backup_${DB_NAME}_$$.sql"

export PGPASSWORD="$POSTGRES_PASSWORD"

while [[ $RETRY -lt $MAX_RETRY ]]; do
echo "[$(date +'%F %T')] [INFO] 正在导出数据库: $DB_NAME (尝试第 $((RETRY + 1)) 次)" | tee -a "$LOG_FILE"

# 使用更详细的错误输出
if pg_dump -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" \
-F p -c --verbose "$DB_NAME" > "$TMP_SQL" 2> "$TMP_LOG"; then
if [[ -s "$TMP_SQL" ]]; then
if gzip -c "$TMP_SQL" > "$OUTPUT_FILE"; then
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
FILE_SIZE=$(du -h "$OUTPUT_FILE" | cut -f1)
echo "[$(date +'%F %T')] [OK] 备份成功: $OUTPUT_FILE" | tee -a "$LOG_FILE"
echo "[$(date +'%F %T')] [INFO] 耗时: $(format_time "$DURATION"),文件大小: $FILE_SIZE" | tee -a "$LOG_FILE"
rm -f "$TMP_SQL" "$TMP_LOG"
return 0
else
echo "[$(date +'%F %T')] [ERROR] 压缩失败: $DB_NAME" | tee -a "$LOG_FILE"
fi
else
echo "[$(date +'%F %T')] [WARN] 导出的 SQL 文件为空: $DB_NAME" | tee -a "$LOG_FILE"
fi
fi

# 输出详细错误信息
if [[ -f "$TMP_LOG" ]]; then
echo "[$(date +'%F %T')] [ERROR] 备份错误详情:" | tee -a "$LOG_FILE"
cat "$TMP_LOG" | tee -a "$LOG_FILE"
fi

if [[ $RETRY -lt $((MAX_RETRY - 1)) ]]; then
echo "[$(date +'%F %T')] [WARN] 第 $((RETRY + 1)) 次备份失败,5 秒后重试..." | tee -a "$LOG_FILE"
sleep 5
fi
RETRY=$((RETRY + 1))
done

echo "[$(date +'%F %T')] [ERROR] 备份失败: $DB_NAME,放弃重试。" | tee -a "$LOG_FILE"
FAILED_DATABASES+=("$DB_NAME")
return 1
}

# === 主备份逻辑 ===

# 首先测试数据库连接
if ! test_db_connection; then
exit 1
fi

# 1. 备份所有数据库
if [[ "$ALL_DATABASES" == "true" ]]; then
BACKUP_FILE="$BACKUP_DIR/all_databases_${TIMESTAMP}.sql.gz"
echo "[$(date +'%F %T')] [INFO] 备份所有数据库到 $BACKUP_FILE" | tee -a "$LOG_FILE"
export PGPASSWORD="$POSTGRES_PASSWORD"

if pg_dumpall -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" \
--verbose > /tmp/all_databases.sql 2>> "$LOG_FILE"; then
if [[ -s /tmp/all_databases.sql ]]; then
if gzip -c /tmp/all_databases.sql > "$BACKUP_FILE"; then
echo "[$(date +'%F %T')] [OK] 所有数据库备份成功: $BACKUP_FILE" | tee -a "$LOG_FILE"
else
echo "[$(date +'%F %T')] [ERROR] 压缩所有数据库备份失败" | tee -a "$LOG_FILE"
STATUS=1
fi
else
echo "[$(date +'%F %T')] [ERROR] 所有数据库备份文件为空" | tee -a "$LOG_FILE"
STATUS=1
fi
rm -f /tmp/all_databases.sql
else
echo "[$(date +'%F %T')] [ERROR] 所有数据库备份失败" | tee -a "$LOG_FILE"
STATUS=1
fi

# 2. 自动获取数据库列表并分别备份
elif [[ "$AUTO_DB_BACKUP" == "true" ]]; then
echo "[$(date +'%F %T')] [INFO] 获取数据库列表..." | tee -a "$LOG_FILE"
export PGPASSWORD="$POSTGRES_PASSWORD"

# 排除模板数据库和默认系统数据库
DB_LIST=$(psql -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -d postgres \
-t -c "SELECT datname FROM pg_database WHERE datistemplate = false AND datname NOT IN ('postgres');" 2>> "$LOG_FILE")

if [[ -z "$DB_LIST" ]]; then
echo "[$(date +'%F %T')] [WARN] 未找到可备份的数据库" | tee -a "$LOG_FILE"
else
echo "[$(date +'%F %T')] [INFO] 找到数据库: $(echo $DB_LIST | tr '\n' ' ')" | tee -a "$LOG_FILE"
for DB in $DB_LIST; do
BACKUP_FILE="$BACKUP_DIR/${DB}_$TIMESTAMP.sql.gz"
echo "[$(date +'%F %T')] [INFO] 开始备份数据库: $DB" | tee -a "$LOG_FILE"
perform_backup "$DB" "$BACKUP_FILE" || STATUS=1
done
fi

# 3. 备份指定数据库
elif [[ -n "$POSTGRES_DB" ]]; then
BACKUP_FILE="$BACKUP_DIR/${POSTGRES_DB}_$TIMESTAMP.sql.gz"
echo "[$(date +'%F %T')] [INFO] 备份指定数据库: $POSTGRES_DB$BACKUP_FILE" | tee -a "$LOG_FILE"
perform_backup "$POSTGRES_DB" "$BACKUP_FILE" || STATUS=1

# 4. 未指定任何备份方式
else
echo "[$(date +'%F %T')] [WARN] 未设置数据库备份目标。" | tee -a "$LOG_FILE"
echo "请设置以下环境变量之一:" | tee -a "$LOG_FILE"
echo " - POSTGRES_DB=数据库名" | tee -a "$LOG_FILE"
echo " - ALL_DATABASES=true" | tee -a "$LOG_FILE"
echo " - AUTO_DB_BACKUP=true" | tee -a "$LOG_FILE"
exit 1
fi

# === 清理过期文件 ===
echo "[$(date +'%F %T')] [INFO] 正在清理 $RETENTION_DAYS 天前的备份文件..." | tee -a "$LOG_FILE"
find "$BACKUP_DIR" -type f -name "*.sql.gz" -mtime "+$RETENTION_DAYS" -delete 2>/dev/null || true

# === 输出失败数据库列表(如有) ===
if [[ ${#FAILED_DATABASES[@]} -gt 0 ]]; then
echo "[$(date +'%F %T')] [ERROR] 以下数据库备份失败:" | tee -a "$LOG_FILE"
for DB in "${FAILED_DATABASES[@]}"; do
echo " - $DB" | tee -a "$LOG_FILE"
done
fi

# === 输出总耗时 ===
TOTAL_TIME=$(format_time "$SECONDS")
if [[ $STATUS -eq 0 ]]; then
echo "[$(date +'%F %T')] [SUCCESS] 备份任务完成,总耗时: $TOTAL_TIME" | tee -a "$LOG_FILE"
else
echo "[$(date +'%F %T')] [WARNING] 备份任务完成,但有错误,总耗时: $TOTAL_TIME" | tee -a "$LOG_FILE"
fi

echo "[$(date +'%F %T')] ===== PostgreSQL 备份任务结束 ===== (状态码: $STATUS)" | tee -a "$LOG_FILE"

exit $STATUS

搭建镜像

1
docker build -t postgres_backup:1.0 .

image-20251009162643522

使用镜像

docker-compose.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
version: "3.8"

services:
postgres_backup:
image: postgres_backup:${BACKUP_VERSION:-1.0}
container_name: postgres_backup
build:
context: ./ # 如果有 Dockerfile 可构建镜像,否则删除这几行直接用 image
dockerfile: Dockerfile
env_file:
- .env
environment:
CRON_SCHEDULE: "${CRON_SCHEDULE:-0 2 * * 0}" # 每周日凌晨2点
volumes:
- ./backup:/backup
restart: unless-stopped
healthcheck:
test: ["CMD", "pgrep", "cron"]
interval: 30s
timeout: 10s
retries: 3

.env

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 镜像版本
BACKUP_VERSION=1.0

# PostgreSQL 连接信息
POSTGRES_HOST=192.168.99.203
POSTGRES_PORT=5432
POSTGRES_USER=admin
POSTGRES_PASSWORD=StrongPass123
POSTGRES_DB=

# 备份策略
AUTO_DB_BACKUP=true
ALL_DATABASES=false
RETENTION_DAYS=7

# 每周日凌晨2点执行(也可以改成每天执行,比如 "0 3 * * *")
CRON_SCHEDULE=0 2 * * 0

image-20251009162706286