es备份镜像搭建
实现通过快照备份es。需要es本身挂载出来snapshots。备份每次的快照并压缩打包。
容器的搭建
准备backup.sh、entrypoint.sh、Dockerfile。
backup.sh
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
| #!/bin/bash set -e
echo "[backup.sh] 备份任务启动:$(date)"
# 检查快照仓库是否存在 echo "[backup.sh] 检查远程快照仓库是否存在..." REPO_EXISTS=$(curl -s -o /dev/null -w "%{http_code}" -u $ES_USER:$ES_PASSWORD \ "http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}")
if [ "$REPO_EXISTS" != "200" ]; then echo "[backup.sh] 仓库不存在,自动注册..." curl -s -u $ES_USER:$ES_PASSWORD -X PUT \ "http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}" \ -H 'Content-Type: application/json' -d "{ \"type\": \"fs\", \"settings\": { \"location\": \"/usr/share/elasticsearch/snapshots\" } }" else echo "[backup.sh] 仓库已存在:$REPO_NAME" fi
# 创建快照 DATE=$(date +%Y_%m_%d-%H%M%S) SNAPSHOT_NAME=snapshot_${DATE} echo "[backup.sh] 创建远程快照:$SNAPSHOT_NAME"
CREATE_SNAPSHOT=$(curl -s -u $ES_USER:$ES_PASSWORD -X PUT \ "http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}/${SNAPSHOT_NAME}?wait_for_completion=true")
if [[ $CREATE_SNAPSHOT == *"error"* ]]; then echo "[backup.sh] 快照创建失败:$CREATE_SNAPSHOT" exit 1 fi
# 解析 UUID SNAP_UUID=$(echo "$CREATE_SNAPSHOT" | grep -oP '"uuid"\s*:\s*"\K[^"]+')
if [ -z "$SNAP_UUID" ]; then echo "[backup.sh] 获取 snapshot UUID 失败" exit 1 fi
echo "[backup.sh] 获取到 snapshot UUID: $SNAP_UUID"
# 打包 snap 和 meta 文件(远程执行) echo "[backup.sh] 远程压缩快照为 tar.gz 文件..." sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=no ${REMOTE_SNAPSHOT_USER}@${REMOTE_ES_IP} " cd ${REMOTE_BACKUP_PATH}/snapshots && \ tar -zcvf ../snapshot_${SNAPSHOT_NAME}.tar.gz snap-${SNAP_UUID}.dat meta-${SNAP_UUID}.dat "
# 清理远程 7 天前的 .tar.gz echo "[backup.sh] 清理远程快照文件(>7天)..." sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=no ${REMOTE_SNAPSHOT_USER}@${REMOTE_ES_IP} " find ${REMOTE_BACKUP_PATH} -type f -mtime +7 -name '*.tar.gz' -exec rm -f {} \; "
# 清理 Elasticsearch 仓库中 7 天前的快照 echo "[backup.sh] 清理远程 Elasticsearch 仓库中7天前的快照..." ALL_SNAPSHOTS=$(curl -s -u $ES_USER:$ES_PASSWORD \ "http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}/_all")
CURRENT_TS=$(date +%s) CUTOFF_TS=$((CURRENT_TS - 7 * 24 * 60 * 60))
echo "$ALL_SNAPSHOTS" | jq -r \ --argjson cutoff_ms $(($CUTOFF_TS * 1000)) \ '.snapshots[] | select(.start_time_in_millis < $cutoff_ms) | .snapshot' | while read -r SNAP; do echo "[backup.sh] 删除旧快照:$SNAP" curl -s -u $ES_USER:$ES_PASSWORD -X DELETE \ "http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_snapshot/${REPO_NAME}/${SNAP}" done
# 拉取到本地 echo "[backup.sh] 使用 rsync 同步快照到本地..." sshpass -p "$SSH_PASSWORD" rsync -avz \ ${REMOTE_SNAPSHOT_USER}@${REMOTE_ES_IP}:${REMOTE_BACKUP_PATH}/snapshot_${SNAPSHOT_NAME}.tar.gz \ ${LOCAL_BACKUP_PATH}/
# 清理本地 30 天前的备份 echo "[backup.sh] 清理本地快照文件(>30天)..." find ${LOCAL_BACKUP_PATH} -type f -mtime +${RETENTION_DAYS} -name "*.tar.gz" -exec rm -f {} \;
echo "[backup.sh] 备份任务完成。"
|
entrypoint.sh
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| #!/bin/bash set -e
# 设置默认的定时任务时间(如果未提供 CRON_SCHEDULE) CRON_SCHEDULE=${CRON_SCHEDULE:-"0 2 * * *"} CRON_FILE="/etc/cron.d/es-backup" ENV_FILE="/etc/default/cron-env.sh" BACKUP_SCRIPT="/usr/local/bin/backup.sh" CRON_TZ="Asia/Shanghai" echo "[entrypoint.sh] 启动定时任务..."
#导出当前环境变量到ENV_FILE #printenv |grep -v "no_proxy" > "$ENV_FILE" echo "#!/bin/bash" > "$ENV_FILE" env | grep -E '^(REMOTE_ES_IP|REMOTE_ES_PORT|ES_USER|ES_PASSWORD|REPO_NAME|REMOTE_SNAPSHOT_USER|SSH_PASSWORD|REMOTE_BACKUP_PATH|LOCAL_BACKUP_PATH|CRON_SCHEDULE|RETENTION_DAYS|TZ)=' | while read -r line; do key=$(echo "$line" | cut -d= -f1) val=$(echo "$line" | cut -d= -f2-) echo "export $key=\"${val}\"" >> "$ENV_FILE" done chmod +x "$ENV_FILE"
#写入带时区的cron任务 #使用 printf 写入 cron 任务,更稳定、无解释错误风险 { echo "CRON_TZ=$CRON_TZ" #printf "%s root . %s; %s >> /proc/1/fd/1 2>&1\n" "$CRON_SCHEDULE" "$ENV_FILE" "$BACKUP_SCRIPT" printf "%s root bash -c '. %s && %s' >> /proc/1/fd/1 2>&1\n" \ "$CRON_SCHEDULE" "$ENV_FILE" "$BACKUP_SCRIPT" } > "$CRON_FILE" chmod 0644 $CRON_FILE
# 显示已写入的任务 echo "[$(date +'%F %T')] === Cron job content:" cat "$CRON_FILE"
# 启动 cron 服务(前台) echo "[$(date +'%F %T')] === Starting cron..." cron -f
|
Dockerfile
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| FROM debian:bullseye
ENV TZ=Asia/Shanghai # 更换阿里云 APT 源(适用于 Debian) RUN sed -i 's|http://deb.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list && \ sed -i 's|http://security.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list
RUN apt-get update && \ apt-get install -y --no-install-recommends rsync ssh sshpass cron tzdata curl jq procps && \ apt-get clean && \ rm -rf /var/lib/apt/lists/*
COPY backup.sh /usr/local/bin/backup.sh COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/*.sh
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
|
起容器
.env
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| # A 服务器 Elasticsearch 配置 REMOTE_ES_IP=192.168.50.102 REMOTE_ES_PORT=9200 ES_USER=elastic ES_PASSWORD=Ypct_2025
REPO_NAME=es_backup
# A 服务器 SSH 配置 REMOTE_SNAPSHOT_USER=root SSH_PASSWORD=Ypct@2dpc
# A 服务器备份路径 REMOTE_BACKUP_PATH=/data/soft/elasticsearch/snapshots
# 本地备份路径-不做修改 LOCAL_BACKUP_PATH=/data/backup/es
# 定时任务 CRON_SCHEDULE=30 1 * * * RETENTION_DAYS=30 TZ=Asia/Shanghai
|
docker-compose.yml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| version: '3.8'
services: es_snapshot_sync: container_name: es_snapshot_sync build: ./es-backup image: es_bakcup:1.0 environment: - REMOTE_ES_IP=${REMOTE_ES_IP} - REMOTE_ES_PORT=${REMOTE_ES_PORT} - ES_USER=${ES_USER} - ES_PASSWORD=${ES_PASSWORD} - REPO_NAME=${REPO_NAME} - REMOTE_SNAPSHOT_USER=${REMOTE_SNAPSHOT_USER} - SSH_PASSWORD=${SSH_PASSWORD} - REMOTE_BACKUP_PATH=${REMOTE_BACKUP_PATH} - LOCAL_BACKUP_PATH=${LOCAL_BACKUP_PATH} - CRON_SCHEDULE=${CRON_SCHEDULE} - RETENTION_DAYS=${RETENTION_DAYS} - TZ=${TZ} volumes: - ./backup-data:${LOCAL_BACKUP_PATH} restart: always healthcheck: test: > curl -fsu ${ES_USER}:${ES_PASSWORD} http://${REMOTE_ES_IP}:${REMOTE_ES_PORT}/_cluster/health && pgrep -x cron && rsync --version interval: 1m timeout: 10s retries: 3 networks: - es_network
networks: es_network: driver: bridge
|