Select Git revision
couchdb2.sh
-
Hahn Axel (hahn) authoredHahn Axel (hahn) authored
couchdb2.sh 17.37 KiB
#!/bin/bash
# ================================================================================
#
# LOCALDUMP :: COUCHDB2 - using nodejs tools couchbackup and couchrestore
# https://github.com/cloudant/couchbackup
#
# Backup:
# - creates gzipped plain text backups (JSON) from each scheme
# - write sequence id into a text file
# - store extra file with security infos
# - latest backup set is written to archive
#
# --------------------------------------------------------------------------------
# ah - Axel Hahn <axel.hahn@iml.unibe.ch>
# ds - Daniel Schueler <daniel.schueler@iml.unibe.ch>
#
# 2019-11-13 ..... v1.0 initial version with backup and restore (single DB)
# 2020-05-19 ..... v1.1 backup a single or multiple couchdb instances by globbing param
# ./localdump.sh backup couchdb2 demo
# 2021-10-11 ..... v1.2 added fastmode in restore: no test connect, do not
# delete DB before create request
# 2022-01-20 v1.3 fixes with shellcheck
# 2022-03-17 v1.4 WIP: add lines with prefix __DB__
# 2022-04-07 v1.5 check archive file, not only seq file
# 2022-04-14 v1.6 backup security infos (no restore yet)
# 2022-04-21 v1.7 restore security infos
# 2022-10-07 ah v1.8 unescape regex with space to prevent "grep: warning: stray \ before white space"
# 2023-06-06 ah v1.9 show a warning if the sequence id was not fetched
# 2023-06-12 ah v1.10 skip couchdb dump if no sequence id was detected (=db deleted since fetching list of all dbs)
# 2023-06-26 ah v1.11 speed up detection of changed databases
# 2023-06-27 ah v1.12 enable tmp file for dblist again (faster); speedup loops in backup
# 2023-06-28 ah v1.13 optimize backup move OUTFILE; measure time; cache backed up sequence ids
# ================================================================================
if [ -z "$BACKUP_TARGETDIR" ]; then
echo "ERROR: you cannot start $(basename "$0") directly"
rc=$rc+1
exit 1
fi
# --------------------------------------------------------------------------------
# CONFIG
# --------------------------------------------------------------------------------
# contains *.config files for each instance
CFGDIR=~/.iml_backup/couchdb2
# UNUSED
# dirPythonPackages=/usr/lib/python2.7/site-packages
# now set in localdump.sh
# ARCHIVE_DIR=$(_j_getvar "${JOBFILE}" dir-dbarchive)/couchdb2
# --------------------------------------------------------------------------------
# FUNCTIONS
# --------------------------------------------------------------------------------
# make an couch api request
# param string method ... one of GET|POST|DELETE
# param string relative url, i.e. _all_dbs or _stats
# param string optional: data for POST|PUT requests
function _couchapi(){
local method=$1
local apiurl=$2
# local outfile=$3
local data=$3
sParams=
# sParams="$sParams -u ${couchdbuser}:${couchdbpw}"
sParams="$sParams -X ${method}"
sParams="$sParams ${COUCH_URL}${apiurl}"
# if [ ! -z "$outfile" ]; then
# sParams="$sParams -o ${outfile}"
# fi
if [ -n "$data" ]; then
sParams="$sParams -d ${data}"
fi
curl $sParams 2>/dev/null
}
function _getDblist(){
_couchapi GET _all_dbs | sed 's#\"#\n#g' | grep -Ev "^(\[|\,|\])$" | grep -v _replicator | grep -v _global_changes
}
# get count of active curl prozesses
function curlCount(){
ps -ef | grep -v grep | grep "curl" | wc -l
}
# wait until min N curl prozesses exist
function wait4curlcount(){
typeset -i local iContinue
typeset -i local iCount
iContinue=${1:-0}
iCount=$( curlCount )
test $iCount -gt $iContinue && wait4curlcount $iContinue
}
# optimized curl requests to get metadata from all databases
# used in _doBackupOfSingleInstance
# it returns a JSON line for each database
#
# param integer iChunksize count urls per curl command
# param integer iParallel count of curl processes
# param string dblistfile path+file to list of database
function reqCombined(){
local iChunksize; typeset -i iChunksize; iChunksize=$1
local iParallel; typeset -i iParallel; iParallel=$2
local dblistfile; dblistfile="$3"
typeset -i iCounter=0
cmdline=
for mydb in $( cat $dblistfile )
do
iCounter+=1
test -n "$cmdline" && cmdline+=" -: "
cmdline+="${COUCH_URL}${mydb} "
if [ $iCounter -ge $iChunksize ]; then
curl -s $cmdline &
# wait untile count of curl proecses is lower maximum
wait4curlcount $iParallel
iCounter=0
cmdline=
fi
done
test -n "${cmdline}" && curl -s $cmdline &
wait4curlcount 0
}
# ---------- CONFIG/ INSTANCES
# get valid configured instances
function getInstances(){
for mycfg in $(ls -1 ${CFGDIR}/*${1}*.config)
do
if . "$mycfg"; then
echo $(basename "${mycfg}" | cut -f 1 -d ".")
fi
done
}
# load the config of an existing instance
# see getInstances to get valid names
# param string name of the instance to load
function loadInstance(){
COUCH_URL=
if ! . "${CFGDIR}/${1}.config"; then
color error
echo ERROR: invalid instance: $1 - the config file cannot be sourced
color reset
exit 1
fi
if [ -z "${COUCH_URL}" ]; then
color error
echo "ERROR: invalid instance: $1 - the config file has no COUCH_URL"
color reset
exit 1
fi
}
# ---------- BACKUP
# backup with loop over instances
# param 1 string globbing filter to config files
function doBackup(){
# # for mycfg in `ls -1 ~/.iml_backup/couchdb/*.config`
# for COUCHDB_INSTANCE in $(getInstances $1)
# do
# loadInstance "$COUCHDB_INSTANCE"
echo "--- instance: $PROFILENAME"
if curl --head -X GET "$COUCH_URL" 2>/dev/null | grep "^HTTP.* 200 "; then
echo OK, connected.
sleep 1
_doBackupOfSingleInstance
else
rc=$rc+1
color error
echo "ERROR: couch DB instance is not available or canot be accessed with these credentials in config file"
# repeat curl to show the error message
curl -X GET "$COUCH_URL"
color reset
fi
echo
echo "--- $(date) done."
echo
# done
}
# make backup of all databases in a couchdb instance
# global: COUCH_URL
# global: PROFILENAME
function _doBackupOfSingleInstance(){
create_targetdir
local ARCHIVE_DIR2="${ARCHIVE_DIR}/deleted_databases"
for _dir in "${ARCHIVE_DIR}" "${ARCHIVE_DIR}/seq" "${ARCHIVE_DIR}/security" \
"${ARCHIVE_DIR2}" "${ARCHIVE_DIR2}/seq" "${ARCHIVE_DIR2}/security"
do
test -d "$_dir" || (echo "creating $_dir" ; mkdir -p "$_dir" )
done
local iChunksize=100
local iParallel=6
local dblistfile
local sSequenceCurrent
local sSequenceLast
local OUTFILE
local ARCHIVFILE
local SEQFILE
local SECURITYFILE
local iTsStart; typeset -i iTsStart
local iTsTotal; typeset -i iTsTotal
local iDbPerSec; typeset -i iDbPerSec
dblistfile="/tmp/dblist_${PROFILENAME}.txt"
# this is just a caching file of the sequence id of the last backup and can be safely deleted.
seqfile="${ARCHIVE_DIR}/seq/all_seqids_of_last_backups_cache.txt"
echo "--- $( date +%H:%M:%S ) Get list of all databases"
_getDblist >"${dblistfile}"
typeset -i iDbTotal; iDbTotal=$( wc -l < "$dblistfile")
typeset -i iDb=0 # counter for number of database in the loop
typeset -i iDbCount=0 # counter for backed up databases
echo "${PROFILENAME} has $iDbTotal databases"
# detect deleted databases:
echo
echo "--- $( date +%H:%M:%S ) MOVE deleted databases "
echo "... into ${ARCHIVE_DIR2}"
echo
for dumpfile in $( find "${ARCHIVE_DIR}/" -maxdepth 1 -type f -name "*.couchdbdump.gz" )
do
# extract database name: get basename and cut extension
# dbname=$( basename $dumpfile | sed "s#\.couchdbdump\.gz##g" )
dbname=${dumpfile##*/}
dbname=${dbname/%.couchdbdump.gz//}
dbname=${dbname/\/}
if ! grep "^${dbname}" "${dblistfile}" >/dev/null; then
SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname}
SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json
echo "DELETED $dbname ... $( ls -l ${dumpfile} | cut -f 5- -d ' ' )"
mv "${dumpfile}" "${ARCHIVE_DIR2}"
mv "${SEQFILE}" "${ARCHIVE_DIR2}/seq/"
mv "${SECURITYFILE}" "${ARCHIVE_DIR2}/security/"
fi
done
echo
echo "--- $( date +%H:%M:%S ) DUMP databases"
echo " of instance ${PROFILENAME}: $iDbTotal databases"
echo " TO BACKUP ${BACKUP_TARGETDIR}"
echo " ARCHIVE ${ARCHIVE_DIR}"
echo
echo "----- $( date +%H:%M:%S ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per process"
seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' ' | awk '{ sub(/-.*/, "", $2 ); print $1 "," $2 }' )
# ^ ^ ^ ^ ^
# db_name + update_seq in a single line --+ | | | and back: space to comma --+
# delete quotes --+ | +-- remove string after first minus char
# comma to space (for awk values) --+
# the result is database name + comma + sequence id
# echo "$seq" | head -3
# _users,7688
# candidate-00649860284626638ac6fd12bf000df5,40
# candidate-04561cddbd0fa305714b48a57929d8b4,3
echo "----- $( date +%H:%M:%S ) - reading current sequence ids..."
declare -A aSeq
for line in $( echo "$seq" )
do
IFS="," read -r db seqid <<< "$line"
aSeq+=([$db]=$seqid)
done
echo "----- $( date +%H:%M:%S ) - reading sequence ids of last backup..."
declare -A aSeqBackup
for line in $( cat "${seqfile}" 2>/dev/null )
do
IFS="," read -r db seqid <<< "$line"
aSeqBackup+=([$db]=$seqid)
done
iTsStart=$( date +%s)
for dbname in $( cat "$dblistfile" )
do
iDb+=1
echo -n "----- $( date +%H:%M:%S ) ${PROFILENAME} -- $iDb of $iDbTotal - ${dbname} - "
ARCHIVFILE=${ARCHIVE_DIR}/${dbname}.couchdbdump.gz
SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname}
SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json
# compare current sequence id with last backup sequence id
sSequenceCurrent="${aSeq[$dbname]}"
sSequenceLast="${aSeqBackup[$dbname]:-$(cat ${SEQFILE} 2>/dev/null | cut -f 1 -d '-')}"
aSeqBackup[${dbname}]=$sSequenceLast
if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] && [ -f "$ARCHIVFILE" ]; then
echo "SKIP: still on sequence ${sSequenceLast}"
else
OUTFILE=${BACKUP_TARGETDIR}/$(get_outfile "${dbname}").couchdbdump
if [ -z "$sSequenceCurrent" ]; then
echo "WARNING: unable to fetch current sequence ID - maybe the database was deleted."
else
echo
echo "update_seq --+-- current [${sSequenceCurrent}]"
echo " +-- backup [${sSequenceLast}]"
echo -n "Need to backup ... "
# TODO
couchbackup --db "${dbname}" >"${OUTFILE}".progress 2>/dev/null && mv "${OUTFILE}".progress "${OUTFILE}"
fetchrc
# $myrc is last returncode - set in fetchrc
if [ $myrc -eq 0 ]; then
echo -n "gzip ... "
compress_file "$OUTFILE"
fetchrc
if [ $myrc -eq 0 ]; then
iDbCount+=1
aSeqBackup[${dbname}]=${sSequenceCurrent}
# flushing cached information
rm -f "${seqfile}" 2>/dev/null
cp "${OUTFILE}"* "${ARCHIVFILE}" \
&& echo "${sSequenceCurrent}">"${SEQFILE}" \
&& _couchapi GET "${dbname}/_security" > "${SECURITYFILE}"
ls -l "${ARCHIVFILE}" "${SEQFILE}" "${SECURITYFILE}"
fi
else
echo "ERROR occured while dumping - abort"
fi
ls -l "$OUTFILE"*
echo
fi # if [ -z "$sSequenceCurrent" ]; then
fi # if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] ...
done
iTsTotal=$( date +%s)-$iTsStart
iDbPerSec=$iDbTotal/$iTsTotal
# cache sequence ids in a file
echo "----- $( date +%H:%M:%S ) - writing sequence ids ..."
rm -f "${seqfile}" 2>/dev/null
for key in "${!aSeqBackup[@]}"; do
echo "$key,${aSeqBackup[$key]}" >> "${seqfile}"
done
ls -l "${seqfile}"
echo
rm -f "$dblistfile"
echo "__DB__$SERVICENAME backup INFO: ${PROFILENAME} - backed up $iDbCount dbs of $iDbTotal total ... in $iTsTotal sec ($iDbPerSec databases per sec)"
}
# ---------- RESTORE
#
# example:
#
# (1)
# cd /var/iml-archive/couchdb2
# or
# cd /var/iml-backup/couchdb2
#
# (2)
# /opt/imlbackup/client/localdump.sh restore couchdb2 measured-preview-couchdbcluster/mydb.couchdbdump.gz axel-01
# ^ ^ ^ ^
# | | | |
# action: restore ---------------+ | | |
# database service: couchdb2 ------------+ | |
# filename with instance as relative path --------+ |
# optional: target database --------------------------------------------------------------------------+
#
# restore a single backup file; the instance and db name will be detected from file
# param string filename of db dump (full path or relative to BACKUP_TARGETDIR)
# param string optional: target database; default: detect name from import database
function restoreByFile(){
sMyfile=$1
dbname=$2
bFastMode=0 # 0 = delete db first and import | 1 = create and import (on empty instance only)
echo
h2 "analyze dump $sMyfile"
# COUCHDB_INSTANCE=$(echo $sMyfile | sed "s#${BACKUP_TARGETDIR}##g" | sed "s#\./##g" | sed "s#^/##g" | cut -f 1 -d "/")
# echo "detected COUCHDB_INSTANCE : [${COUCHDB_INSTANCE}]"
# if [ -z "$COUCHDB_INSTANCE" ]; then
# echo "ERROR: Name of the instance was not detected."
# echo " For couchdb restore you should cd to the ${BACKUP_TARGETDIR} or ${ARCHIVE_DIR}"
# exit 1
# fi
local _sourceDB="$( guessDB $sMyfile | sed 's#.couchdbdump.gz$##' )"
echo "detected source database : [${_sourceDB}]"
if [ -z "$dbname" ]; then
dbname="$_sourceDB"
echo "using the same as target : [${dbname}]"
else
echo "using db schema from param 2: [${dbname}]"
fi
echo
# loadInstance $COUCHDB_INSTANCE
if [ $bFastMode -eq 0 ]; then
echo connect $couchdbhost on port $couchdbport with user $couchdbuser
curl --head -X GET $COUCH_URL 2>/dev/null | grep "^HTTP.* 200 " >/dev/null
if [ $? -ne 0 ]; then
color error
echo ERROR: couch DB instance is not available
curl -X GET $COUCH_URL
color reset
exit 1
fi
color ok
echo OK
color reset
fi
echo
# _getDblist | grep "^${dbname}$"
# if [ $? -eq 0 ]; then
# echo DB exists ... need to drop it first
# fi
if [ $bFastMode -eq 0 ]; then
h2 deleting database [$dbname] ...
color cmd
_couchapi DELETE $dbname
fetchrc
color reset
fi
h2 creating database [$dbname] ...
color cmd
_couchapi PUT $dbname
fetchrc
color reset
h2 import file ...
color cmd
zcat ${sMyfile} | couchrestore --db $dbname
fetchrc
color reset
h2 add security infos ...
# todo: this will fail when restoring from "deleted_databases" folder
SECURITYFILE="${ARCHIVE_DIR}/security/__security__${_sourceDB}.json"
if [ -f "$SECURITYFILE" ]; then
SECDATA="$( cat $SECURITYFILE )"
color cmd
echo "add security data: $SECDATA"
_couchapi PUT "${dbname}/_security" "$SECDATA"
fetchrc
color reset
else
color warning
echo "WARNING: no security data file was found: $SECURITYFILE"
color reset
fi
echo
}
# --------------------------------------------------------------------------------
# MAIN
# --------------------------------------------------------------------------------
# ----- check requirements
# --- is a couchd here
# j_requireProcess "couchdb" 1
# --- very specific :-/ ... check available config files
ls -1 ${CFGDIR}/* >/dev/null 2>&1
rc=$rc+$?
if [ $rc -eq 0 ]; then
# echo OK: couchdb2 config was found on this system ... checking requirements for backup ...
j_requireBinary "curl" 1
j_requireBinary "couchbackup" 1
j_requireBinary "couchrestore" 1
#ls ${dirPythonPackages}/couchdb/tools/dump.py ${dirPythonPackages}/couchdb/tools/load.py >/dev/null && echo "OK: python couchdb tools were found"
#rc=$rc+$?
if [ $rc -eq 0 ]; then
echo
if [ "$1" = "restore" ]; then
echo
shift 1
restoreByFile $*
else
shift 1
# remove keyword ALL which is used for localdump.sh to loop over all db types
test "$1" = "ALL" && shift 1
doBackup $*
fi
else
color error
echo ERROR: Couchdb is here but I am missing things for the backup :-/
color reset
fi
else
rc=0
echo "__DB__$SERVICENAME SKIP: couchdb2 config does not seem to be here"
fi
echo "__DB__$SERVICENAME INFO: $0 $* [$SERVICENAME] final returncode rc=$rc"
# --------------------------------------------------------------------------------