es备份镜像搭建

​ 实现通过快照备份es。需要es本身挂载出来snapshots。备份每次的快照并压缩打包。

容器的搭建

​ 准备backup.sh、entrypoint.sh、Dockerfile。

backup.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
set -e

echo "[backup.sh] 备份任务启动:$(date)"

# 检查快照仓库是否存在
echo "[backup.sh] 检查远程快照仓库是否存在..."
REPO_EXISTS=$(curl -s -o /dev/null -w "%{http_code}" -u $ES_USER:$ES_PASSWORD \
"http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}")

if [ "$REPO_EXISTS" != "200" ]; then
echo "[backup.sh] 仓库不存在,自动注册..."
curl -s -u $ES_USER:$ES_PASSWORD -X PUT \
"http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}" \
-H 'Content-Type: application/json' -d "{
\"type\": \"fs\",
\"settings\": {
\"location\": \"/usr/share/elasticsearch/snapshots\"
}
}"
else
echo "[backup.sh] 仓库已存在:$REPO_NAME"
fi

# 创建快照
DATE=$(date +%Y_%m_%d-%H%M%S)
SNAPSHOT_NAME=snapshot_${DATE}
echo "[backup.sh] 创建远程快照:$SNAPSHOT_NAME"

CREATE_SNAPSHOT=$(curl -s -u $ES_USER:$ES_PASSWORD -X PUT \
"http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}/${SNAPSHOT_NAME}?wait_for_completion=true")

if [[ $CREATE_SNAPSHOT == *"error"* ]]; then
echo "[backup.sh] 快照创建失败:$CREATE_SNAPSHOT"
exit 1
fi

# 解析 UUID
SNAP_UUID=$(echo "$CREATE_SNAPSHOT" | grep -oP '"uuid"\s*:\s*"\K[^"]+')

if [ -z "$SNAP_UUID" ]; then
echo "[backup.sh] 获取 snapshot UUID 失败"
exit 1
fi

echo "[backup.sh] 获取到 snapshot UUID: $SNAP_UUID"

# 打包 snap 和 meta 文件(远程执行)
echo "[backup.sh] 远程压缩快照为 tar.gz 文件..."
sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=no ${REMOTE_SNAPSHOT_USER}@${REMOTE_ES_IP} "
cd ${REMOTE_BACKUP_PATH}/snapshots && \
tar -zcvf ../snapshot_${SNAPSHOT_NAME}.tar.gz snap-${SNAP_UUID}.dat meta-${SNAP_UUID}.dat
"

# 清理远程 7 天前的 .tar.gz
echo "[backup.sh] 清理远程快照文件(>7天)..."
sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=no ${REMOTE_SNAPSHOT_USER}@${REMOTE_ES_IP} "
find ${REMOTE_BACKUP_PATH} -type f -mtime +7 -name '*.tar.gz' -exec rm -f {} \;
"

# 清理 Elasticsearch 仓库中 7 天前的快照
echo "[backup.sh] 清理远程 Elasticsearch 仓库中7天前的快照..."
ALL_SNAPSHOTS=$(curl -s -u $ES_USER:$ES_PASSWORD \
"http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}/_all")

CURRENT_TS=$(date +%s)
CUTOFF_TS=$((CURRENT_TS - 7 * 24 * 60 * 60))

echo "$ALL_SNAPSHOTS" | jq -r \
--argjson cutoff_ms $(($CUTOFF_TS * 1000)) \
'.snapshots[] | select(.start_time_in_millis < $cutoff_ms) | .snapshot' | while read -r SNAP; do
echo "[backup.sh] 删除旧快照:$SNAP"
curl -s -u $ES_USER:$ES_PASSWORD -X DELETE \
"http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}/${SNAP}"
done

# 拉取到本地
echo "[backup.sh] 使用 rsync 同步快照到本地..."
sshpass -p "$SSH_PASSWORD" rsync -avz \
${REMOTE_SNAPSHOT_USER}@${REMOTE_ES_IP}:${REMOTE_BACKUP_PATH}/snapshot_${SNAPSHOT_NAME}.tar.gz \
${LOCAL_BACKUP_PATH}/

# 清理本地 30 天前的备份
echo "[backup.sh] 清理本地快照文件(>30天)..."
find ${LOCAL_BACKUP_PATH} -type f -mtime +${RETENTION_DAYS} -name "*.tar.gz" -exec rm -f {} \;

echo "[backup.sh] 备份任务完成。"

entrypoint.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
set -e

# 设置默认的定时任务时间(如果未提供 CRON_SCHEDULE)
CRON_SCHEDULE=${CRON_SCHEDULE:-"0 2 * * *"}
CRON_FILE="/etc/cron.d/es-backup"
ENV_FILE="/etc/default/cron-env.sh"
BACKUP_SCRIPT="/usr/local/bin/backup.sh"
CRON_TZ="Asia/Shanghai"
echo "[entrypoint.sh] 启动定时任务..."

#导出当前环境变量到ENV_FILE
#printenv |grep -v "no_proxy" > "$ENV_FILE"
echo "#!/bin/bash" > "$ENV_FILE"
env | grep -E '^(REMOTE_ES_IP|REMOTE_ES_PORT|ES_USER|ES_PASSWORD|REPO_NAME|REMOTE_SNAPSHOT_USER|SSH_PASSWORD|REMOTE_BACKUP_PATH|LOCAL_BACKUP_PATH|CRON_SCHEDULE|RETENTION_DAYS|TZ)=' | while read -r line; do
key=$(echo "$line" | cut -d= -f1)
val=$(echo "$line" | cut -d= -f2-)
echo "export $key=\"${val}\"" >> "$ENV_FILE"
done
chmod +x "$ENV_FILE"

#写入带时区的cron任务
#使用 printf 写入 cron 任务,更稳定、无解释错误风险
{
echo "CRON_TZ=$CRON_TZ"
#printf "%s root . %s; %s >> /proc/1/fd/1 2>&1\n" "$CRON_SCHEDULE" "$ENV_FILE" "$BACKUP_SCRIPT"
printf "%s root bash -c '. %s && %s' >> /proc/1/fd/1 2>&1\n" \
"$CRON_SCHEDULE" "$ENV_FILE" "$BACKUP_SCRIPT"
} > "$CRON_FILE"
chmod 0644 $CRON_FILE

# 显示已写入的任务
echo "[$(date +'%F %T')] === Cron job content:"
cat "$CRON_FILE"

# 启动 cron 服务(前台)
echo "[$(date +'%F %T')] === Starting cron..."
cron -f

Dockerfile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
FROM debian:bullseye

ENV TZ=Asia/Shanghai
# 更换阿里云 APT 源(适用于 Debian)
RUN sed -i 's|http://deb.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list && \
sed -i 's|http://security.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list

RUN apt-get update && \
apt-get install -y --no-install-recommends rsync ssh sshpass cron tzdata curl jq procps && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY backup.sh /usr/local/bin/backup.sh
COPY entrypoint.sh /usr/local/bin/entrypoint.sh

RUN chmod +x /usr/local/bin/*.sh

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]

起容器

.env

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# A 服务器 Elasticsearch 配置
REMOTE_ES_IP=192.168.50.102
REMOTE_ES_PORT=9200
ES_USER=elastic
ES_PASSWORD=Ypct_2025

REPO_NAME=es_backup

# A 服务器 SSH 配置
REMOTE_SNAPSHOT_USER=root
SSH_PASSWORD=Ypct@2dpc

# A 服务器备份路径
REMOTE_BACKUP_PATH=/data/soft/elasticsearch/snapshots

# 本地备份路径-不做修改
LOCAL_BACKUP_PATH=/data/backup/es

# 定时任务
CRON_SCHEDULE=30 1 * * *
RETENTION_DAYS=30
TZ=Asia/Shanghai

docker-compose.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
version: '3.8'

services:
es_snapshot_sync:
container_name: es_snapshot_sync
build: ./es-backup
image: es_bakcup:1.0
environment:
- REMOTE_ES_IP=${REMOTE_ES_IP}
- REMOTE_ES_PORT=${REMOTE_ES_PORT}
- ES_USER=${ES_USER}
- ES_PASSWORD=${ES_PASSWORD}
- REPO_NAME=${REPO_NAME}
- REMOTE_SNAPSHOT_USER=${REMOTE_SNAPSHOT_USER}
- SSH_PASSWORD=${SSH_PASSWORD}
- REMOTE_BACKUP_PATH=${REMOTE_BACKUP_PATH}
- LOCAL_BACKUP_PATH=${LOCAL_BACKUP_PATH}
- CRON_SCHEDULE=${CRON_SCHEDULE}
- RETENTION_DAYS=${RETENTION_DAYS}
- TZ=${TZ}
volumes:
- ./backup-data:${LOCAL_BACKUP_PATH}
restart: always
healthcheck:
test: >
curl -fsu ${ES_USER}:${ES_PASSWORD} http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_cluster/health &&
pgrep -x cron &&
rsync --version
interval: 1m
timeout: 10s
retries: 3
networks:
- es_network

networks:
es_network:
driver: bridge