#!/bin/bash # ================================================================================ # # LOCALDUMP :: COUCHDB2 - using nodejs tools couchbackup and couchrestore # https://github.com/cloudant/couchbackup # # Backup: # - creates gzipped plain text backups (JSON) from each scheme # - write sequence id into a text file # - store extra file with security infos # - latest backup set is written to archive # # -------------------------------------------------------------------------------- # ah - Axel Hahn <axel.hahn@iml.unibe.ch> # ds - Daniel Schueler <daniel.schueler@iml.unibe.ch> # # 2019-11-13 ..... v1.0 initial version with backup and restore (single DB) # 2020-05-19 ..... v1.1 backup a single or multiple couchdb instances by globbing param # ./localdump.sh backup couchdb2 demo # 2021-10-11 ..... v1.2 added fastmode in restore: no test connect, do not # delete DB before create request # 2022-01-20 v1.3 fixes with shellcheck # 2022-03-17 v1.4 WIP: add lines with prefix __DB__ # 2022-04-07 v1.5 check archive file, not only seq file # 2022-04-14 v1.6 backup security infos (no restore yet) # 2022-04-21 v1.7 restore security infos # 2022-10-07 ah v1.8 unescape regex with space to prevent "grep: warning: stray \ before white space" # 2023-06-06 ah v1.9 show a warning if the sequence id was not fetched # 2023-06-12 ah v1.10 skip couchdb dump if no sequence id was detected (=db deleted since fetching list of all dbs) # 2023-06-26 ah v1.11 speed up detection of changed databases # 2023-06-27 ah v1.12 enable tmp file for dblist again (faster); speedup loops in backup # 2023-06-28 ah v1.13 optimize backup move OUTFILE; measure time; cache backed up sequence ids # ================================================================================ if [ -z "$BACKUP_TARGETDIR" ]; then echo "ERROR: you cannot start $(basename "$0") directly" rc=$rc+1 exit 1 fi # -------------------------------------------------------------------------------- # CONFIG # -------------------------------------------------------------------------------- # contains *.config files for each instance CFGDIR=~/.iml_backup/couchdb2 # UNUSED # dirPythonPackages=/usr/lib/python2.7/site-packages # now set in localdump.sh # ARCHIVE_DIR=$(_j_getvar "${JOBFILE}" dir-dbarchive)/couchdb2 # -------------------------------------------------------------------------------- # FUNCTIONS # -------------------------------------------------------------------------------- # make an couch api request # param string method ... one of GET|POST|DELETE # param string relative url, i.e. _all_dbs or _stats # param string optional: data for POST|PUT requests function _couchapi(){ local method=$1 local apiurl=$2 # local outfile=$3 local data=$3 sParams= # sParams="$sParams -u ${couchdbuser}:${couchdbpw}" sParams="$sParams -X ${method}" sParams="$sParams ${COUCH_URL}${apiurl}" # if [ ! -z "$outfile" ]; then # sParams="$sParams -o ${outfile}" # fi if [ -n "$data" ]; then sParams="$sParams -d ${data}" fi curl $sParams 2>/dev/null } function _getDblist(){ _couchapi GET _all_dbs | sed 's#\"#\n#g' | grep -Ev "^(\[|\,|\])$" | grep -v _replicator | grep -v _global_changes } # UNUSED # get value update_seq of given couchdb name # function _getDbSeq(){ # # _couchapi GET $1 | sed 's#,\"#\n"#g' | egrep -v "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' # _couchapi GET "$1" | sed 's#,\"#\n"#g' | grep -Ev "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' | cut -f 1 -d '-' # } # active curl prozesses function curlCount(){ ps -ef | grep -v grep | grep "curl" | wc -l } # wait until min N curl prozesses exist function wait4curlcount(){ typeset -i local iContinue typeset -i local iCount iContinue=${1:-0} iCount=$( curlCount ) test $iCount -gt $iContinue && wait4curlcount $iContinue } # optimized curl requests to get metadata from all databases # used in _doBackupOfSingleInstance # it returns a JSON line for each database # # param integer iChunksize count urls per curl command # param integer iParallel count of curl processes # param string dblistfile path+file to list of database function reqCombined(){ local iChunksize; typeset -i iChunksize; iChunksize=$1 local iParallel; typeset -i iParallel; iParallel=$2 local dblistfile; dblistfile="$3" typeset -i iCounter=0 cmdline= for mydb in $( cat $dblistfile ) do iCounter+=1 test -n "$cmdline" && cmdline+=" -: " cmdline+="${COUCH_URL}${mydb} " if [ $iCounter -ge $iChunksize ]; then curl -s $cmdline & # wait untile count of curl proecses is lower maximum wait4curlcount $iParallel iCounter=0 cmdline= fi done test -n "${cmdline}" && curl -s $cmdline & wait4curlcount 0 } # ---------- CONFIG/ INSTANCES # get valid configured instances function getInstances(){ for mycfg in $(ls -1 ${CFGDIR}/*${1}*.config) do if . "$mycfg"; then echo $(basename "${mycfg}" | cut -f 1 -d ".") fi done } # load the config of an existing instance # see getInstances to get valid names # param string name of the instance to load function loadInstance(){ COUCH_URL= if ! . "${CFGDIR}/${1}.config"; then color error echo ERROR: invalid instance: $1 - the config file cannot be sourced color reset exit 1 fi if [ -z "${COUCH_URL}" ]; then color error echo "ERROR: invalid instance: $1 - the config file has no COUCH_URL" color reset exit 1 fi } # ---------- BACKUP # backup with loop over instances # param 1 string globbing filter to config files function doBackup(){ # # for mycfg in `ls -1 ~/.iml_backup/couchdb/*.config` # for COUCHDB_INSTANCE in $(getInstances $1) # do # loadInstance "$COUCHDB_INSTANCE" echo "--- instance: $PROFILENAME" if curl --head -X GET "$COUCH_URL" 2>/dev/null | grep "^HTTP.* 200 "; then echo OK, connected. sleep 1 _doBackupOfSingleInstance else rc=$rc+1 color error echo "ERROR: couch DB instance is not available or canot be accessed with these credentials in config file" # repeat curl to show the error message curl -X GET "$COUCH_URL" color reset fi echo echo "--- $(date) done." echo # done } # make backup of all databases in a couchdb instance # global: COUCH_URL # global: PROFILENAME function _doBackupOfSingleInstance(){ create_targetdir local ARCHIVE_DIR2="${ARCHIVE_DIR}/deleted_databases" for _dir in "${ARCHIVE_DIR}" "${ARCHIVE_DIR}/seq" "${ARCHIVE_DIR}/security" \ "${ARCHIVE_DIR2}" "${ARCHIVE_DIR2}/seq" "${ARCHIVE_DIR2}/security" do test -d "$_dir" || (echo "creating $_dir" ; mkdir -p "$_dir" ) done local iChunksize=100 local iParallel=6 local dblistfile local sSequenceCurrent local sSequenceLast local OUTFILE local ARCHIVFILE local SEQFILE local SECURITYFILE local iTsStart; typeset -i iTsStart local iTsTotal; typeset -i iTsTotal local iDbPerSec; typeset -i iDbPerSec dblistfile="/tmp/dblist_${PROFILENAME}.txt" # this is just a caching file of the sequence id of the last backup and can be safely deleted. seqfile="${ARCHIVE_DIR}/seq/all_seqids_of_last_backups_cache.txt" echo "--- $( date ) Get list of all databases" _getDblist >"${dblistfile}" typeset -i iDbTotal; iDbTotal=$( wc -l < "$dblistfile") typeset -i iDb=0 # counter for number of database in the loop typeset -i iDbCount=0 # counter for backed up databases echo "${PROFILENAME} has $iDbTotal databases" # detect deleted databases: echo echo "--- $( date ) MOVE deleted databases " echo "... into ${ARCHIVE_DIR2}" echo for dumpfile in $( find "${ARCHIVE_DIR}/" -maxdepth 1 -type f -name "*.couchdbdump.gz" ) do # extract database name: get basename and cut extension # dbname=$( basename $dumpfile | sed "s#\.couchdbdump\.gz##g" ) dbname=${dumpfile##*/} dbname=${dbname/%.couchdbdump.gz//} dbname=${dbname/\/} if ! grep "^${dbname}" "${dblistfile}" >/dev/null; then SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname} SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json echo "DELETED $dbname ... $( ls -l ${dumpfile} | cut -f 5- -d ' ' )" mv "${dumpfile}" "${ARCHIVE_DIR2}" mv "${SEQFILE}" "${ARCHIVE_DIR2}/seq/" mv "${SECURITYFILE}" "${ARCHIVE_DIR2}/security/" fi done echo echo "--- $( date ) DUMP databases" echo " of instance ${PROFILENAME}: $iDbTotal databases" echo " TO BACKUP ${BACKUP_TARGETDIR}" echo " ARCHIVE ${ARCHIVE_DIR}" echo echo "----- $( date ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per process" seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' ' | awk '{ sub(/-.*/, "", $2 ); print $1 "," $2 }' ) # ^ ^ ^ ^ ^ # db_name + update_seq in a single line --+ | | | and back: space to comma --+ # delete quotes --+ | +-- remove string after first minus char # comma to space (for awk values) --+ # the result is ... echo "$seq" | head -3 # _users,7688 # candidate-00649860284626638ac6fd12bf000df5,40 # candidate-04561cddbd0fa305714b48a57929d8b4,3 echo "----- $( date ) - reading current sequence ids..." declare -A aSeq for line in $( echo "$seq" ) do IFS="," read -r db seqid <<< "$line" aSeq+=([$db]=$seqid) done echo "----- $( date ) - reading sequence ids of last backup..." declare -A aSeqBackup for line in $( cat "${seqfile}" 2>/dev/null ) do IFS="," read -r db seqid <<< "$line" aSeqBackup+=([$db]=$seqid) done iTsStart=$( date +%s) for dbname in $( cat "$dblistfile" ) do iDb+=1 echo -n "----- $(date) ${PROFILENAME} -- $iDb of $iDbTotal - ${dbname} - " # set later .. OUTFILE=${BACKUP_TARGETDIR}/${COUCHDB_INSTANCE}/$(get_outfile "${dbname}").couchdbdump ARCHIVFILE=${ARCHIVE_DIR}/${dbname}.couchdbdump.gz SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname} SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json # sSequenceCurrent=$(_getDbSeq "${dbname}") sSequenceCurrent="${aSeq[$dbname]}" # sSequenceLast=$(cat "${SEQFILE}" 2>/dev/null | cut -f 1 -d '-') sSequenceLast="${aSeqBackup[$dbname]:-$(cat ${SEQFILE} 2>/dev/null | cut -f 1 -d '-')}" aSeqBackup[${dbname}]=$sSequenceLast if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] && [ -f "$ARCHIVFILE" ]; then echo "SKIP: still on sequence ${sSequenceLast}" # add security file for already existing databases test -f "${SECURITYFILE}" || ( echo "INFO: creating missing security file ${SECURITYFILE}" _couchapi GET "${dbname}/_security" > "${SECURITYFILE}" ) else OUTFILE=${BACKUP_TARGETDIR}/$(get_outfile "${dbname}").couchdbdump if [ -z "$sSequenceCurrent" ]; then echo "WARNING: unable to fetch current sequence ID - maybe the database was deleted." else echo echo "update_seq --+-- current [${sSequenceCurrent}]" echo " +-- backup [${sSequenceLast}]" echo -n "Need to backup ... " # TODO # check command line echo couchbackup --db "${dbname}" >"${OUTFILE}".progress 2>/dev/null && mv "${OUTFILE}".progress "${OUTFILE}" # exit 1; fetchrc # $myrc is last returncode - set in fetchrc if [ $myrc -eq 0 ]; then echo -n "gzip ... " compress_file "$OUTFILE" fetchrc if [ $myrc -eq 0 ]; then iDbCount+=1 aSeqBackup[${dbname}]=${sSequenceCurrent} # flushing cached information rm -f "${seqfile}" 2>/dev/null cp "${OUTFILE}"* "${ARCHIVFILE}" \ && echo "${sSequenceCurrent}">"${SEQFILE}" \ && _couchapi GET "${dbname}/_security" > "${SECURITYFILE}" ls -l "${ARCHIVFILE}" "${SEQFILE}" "${SECURITYFILE}" fi else echo "ERROR occured while dumping - abort" fi ls -l "$OUTFILE"* echo fi # if [ -z "$sSequenceCurrent" ]; then fi # if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] ... done iTsTotal=$( date +%s)-$iTsStart iDbPerSec=$iDbTotal/$iTsTotal echo "----- $( date ) - writing sequence ids ..." rm -f "${seqfile}" 2>/dev/null for key in "${!aSeqBackup[@]}"; do echo "$key,${aSeqBackup[$key]}" >> "${seqfile}" done ls -l "${seqfile}" echo rm -f "$dblistfile" echo "__DB__$SERVICENAME backup INFO: ${PROFILENAME} - backed up $iDbCount dbs of $iDbTotal total ... in $iTsTotal sec ($iDbPerSec databases per sec)" } # ---------- RESTORE # # example: # # (1) # cd /var/iml-archive/couchdb2 # or # cd /var/iml-backup/couchdb2 # # (2) # /opt/imlbackup/client/localdump.sh restore couchdb2 measured-preview-couchdbcluster/mydb.couchdbdump.gz axel-01 # ^ ^ ^ ^ # | | | | # action: restore ---------------+ | | | # database service: couchdb2 ------------+ | | # filename with instance as relative path --------+ | # optional: target database --------------------------------------------------------------------------+ # # restore a single backup file; the instance and db name will be detected from file # param string filename of db dump (full path or relative to BACKUP_TARGETDIR) # param string optional: target database; default: detect name from import database function restoreByFile(){ sMyfile=$1 dbname=$2 bFastMode=0 # 0 = delete db first and import | 1 = create and import (on empty instance only) echo h2 "analyze dump $sMyfile" # COUCHDB_INSTANCE=$(echo $sMyfile | sed "s#${BACKUP_TARGETDIR}##g" | sed "s#\./##g" | sed "s#^/##g" | cut -f 1 -d "/") # echo "detected COUCHDB_INSTANCE : [${COUCHDB_INSTANCE}]" # if [ -z "$COUCHDB_INSTANCE" ]; then # echo "ERROR: Name of the instance was not detected." # echo " For couchdb restore you should cd to the ${BACKUP_TARGETDIR} or ${ARCHIVE_DIR}" # exit 1 # fi local _sourceDB="$( guessDB $sMyfile | sed 's#.couchdbdump.gz$##' )" echo "detected source database : [${_sourceDB}]" if [ -z "$dbname" ]; then dbname="$_sourceDB" echo "using the same as target : [${dbname}]" else echo "using db schema from param 2: [${dbname}]" fi echo # loadInstance $COUCHDB_INSTANCE if [ $bFastMode -eq 0 ]; then echo connect $couchdbhost on port $couchdbport with user $couchdbuser curl --head -X GET $COUCH_URL 2>/dev/null | grep "^HTTP.* 200 " >/dev/null if [ $? -ne 0 ]; then color error echo ERROR: couch DB instance is not available curl -X GET $COUCH_URL color reset exit 1 fi color ok echo OK color reset fi echo # _getDblist | grep "^${dbname}$" # if [ $? -eq 0 ]; then # echo DB exists ... need to drop it first # fi if [ $bFastMode -eq 0 ]; then h2 deleting database [$dbname] ... color cmd _couchapi DELETE $dbname fetchrc color reset fi h2 creating database [$dbname] ... color cmd _couchapi PUT $dbname fetchrc color reset h2 import file ... color cmd zcat ${sMyfile} | couchrestore --db $dbname fetchrc color reset h2 add security infos ... # todo: this will fail when restoring from "deleted_databases" folder SECURITYFILE="${ARCHIVE_DIR}/security/__security__${_sourceDB}.json" SECDATA="$( cat $SECURITYFILE )" color cmd echo "add security data: $SECDATA" _couchapi PUT "${dbname}/_security" "$SECDATA" fetchrc color reset echo } # -------------------------------------------------------------------------------- # MAIN # -------------------------------------------------------------------------------- # ----- check requirements # --- is a couchd here # j_requireProcess "couchdb" 1 # --- very specific :-/ ... check available config files ls -1 ${CFGDIR}/* >/dev/null 2>&1 rc=$rc+$? if [ $rc -eq 0 ]; then # echo OK: couchdb2 config was found on this system ... checking requirements for backup ... j_requireBinary "curl" 1 j_requireBinary "couchbackup" 1 j_requireBinary "couchrestore" 1 #ls ${dirPythonPackages}/couchdb/tools/dump.py ${dirPythonPackages}/couchdb/tools/load.py >/dev/null && echo "OK: python couchdb tools were found" #rc=$rc+$? if [ $rc -eq 0 ]; then echo if [ "$1" = "restore" ]; then echo shift 1 restoreByFile $* else shift 1 # remove keyword ALL which is used for localdump.sh to loop over all db types test "$1" = "ALL" && shift 1 doBackup $* fi else color error echo ERROR: Couchdb is here but I am missing things for the backup :-/ color reset fi else rc=0 echo "__DB__$SERVICENAME SKIP: couchdb2 config does not seem to be here" fi echo "__DB__$SERVICENAME INFO: $0 $* [$SERVICENAME] final returncode rc=$rc" # --------------------------------------------------------------------------------