Skip to content
Snippets Groups Projects
Select Git revision
  • 6e5e354d44659796559c3e0b9653355e1513c2ad
  • main default protected
  • v0.1
3 results

200_exec.php

Blame
  • couchdb2.sh 16.36 KiB
    #!/bin/bash
    # ================================================================================
    #
    # LOCALDUMP :: COUCHDB2 - using nodejs tools couchbackup and couchrestore
    # https://github.com/cloudant/couchbackup
    #
    # Backup:
    # - creates gzipped plain text backups (JSON) from each scheme
    # - write sequence id into a text file
    # - store extra file with security infos
    # - latest backup set is written to archive
    #
    # --------------------------------------------------------------------------------
    # ah - Axel Hahn <axel.hahn@iml.unibe.ch>
    # ds - Daniel Schueler <daniel.schueler@iml.unibe.ch>
    #
    # 2019-11-13  .....  v1.0   initial version with backup and restore (single DB)
    # 2020-05-19  .....  v1.1   backup a single or multiple couchdb instances by globbing param
    #                           ./localdump.sh backup couchdb2 demo
    # 2021-10-11  .....  v1.2   added fastmode in restore: no test connect, do not 
    #                           delete DB before create request
    # 2022-01-20         v1.3   fixes with shellcheck
    # 2022-03-17         v1.4   WIP: add lines with prefix __DB__
    # 2022-04-07         v1.5   check archive file, not only seq file
    # 2022-04-14         v1.6   backup security infos (no restore yet)
    # 2022-04-21         v1.7   restore security infos
    # 2022-10-07  ah     v1.8   unescape regex with space to prevent "grep: warning: stray \ before white space"
    # 2023-06-06  ah     v1.9   show a warning if the sequence id was not fetched
    # 2023-06-12  ah     v1.10  skip couchdb dump if no sequence id was detected (=db deleted since fetching list of all dbs)
    # 2023-06-26  ah     v1.11  speed up detection of changed databases
    # 2023-06-27  ah     v1.12  enable tmp file for dblist again (faster); speedup loops in backup
    # 2023-06-28  ah     v1.13  optimize backup move OUTFILE; measure time; cache backed up sequence ids
    # 2024-10-02  ah     v1.14  rename backup and restore function
    # ================================================================================
    
    if [ -z "$BACKUP_TARGETDIR" ]; then
      echo "ERROR: you cannot start $(basename "$0") directly"
      rc=$rc+1
      exit 1
    fi
    
    # --------------------------------------------------------------------------------
    # FUNCTIONS
    # --------------------------------------------------------------------------------
    
    # make an couch api request
    # param  string  method ... one of GET|POST|DELETE
    # param  string  relative url, i.e. _all_dbs or _stats
    # param  string  optional: data for POST|PUT requests
    function _couchapi(){
        local method=$1
        local apiurl=$2
        # local outfile=$3
        local data=$3
    
        sParams=
        # sParams="$sParams -u ${couchdbuser}:${couchdbpw}"
        sParams="$sParams -X ${method}"
        sParams="$sParams ${COUCH_URL}${apiurl}"
        # if [ ! -z "$outfile" ]; then
        #   sParams="$sParams -o ${outfile}"
        # fi
        if [ -n "$data" ]; then
            sParams="$sParams -d ${data}"
        fi
        curl $sParams 2>/dev/null
    }
    
    function _getDblist(){
        _couchapi GET _all_dbs | sed 's#\"#\n#g' | grep -Ev "^(\[|\,|\])$" | grep -v _replicator | grep -v _global_changes
    }
    
    # get count of active curl prozesses
    function curlCount(){
        ps -ef | grep -v grep | grep "curl" | wc -l
    }
    
    # wait until min N curl prozesses exist
    function wait4curlcount(){
        typeset -i local iContinue
        typeset -i local iCount
    
        iContinue=${1:-0}
        iCount=$( curlCount )
    
        test $iCount -gt $iContinue && wait4curlcount $iContinue
    }
    
    # optimized curl requests to get metadata from all databases
    # used in _doBackupOfSingleInstance
    # it returns a JSON line for each database
    #
    # param  integer  iChunksize  count urls per curl command
    # param  integer  iParallel   count of curl processes
    # param  string   dblistfile  path+file to list of database
    function reqCombined(){
        local iChunksize; typeset -i iChunksize; iChunksize=$1
        local iParallel;  typeset -i iParallel;  iParallel=$2
        local dblistfile;                        dblistfile="$3"
    
        typeset -i iCounter=0
        cmdline=
    
        for mydb in $( cat $dblistfile )
        do 
    
            iCounter+=1
            test -n "$cmdline" && cmdline+=" -: "
            cmdline+="${COUCH_URL}${mydb} "
    
            if [ $iCounter -ge $iChunksize ]; then
    
                curl -s $cmdline &
    
                # wait untile count of curl proecses is lower maximum
                wait4curlcount $iParallel
    
                iCounter=0
                cmdline=
            fi
    
        done
        test -n "${cmdline}" && curl -s $cmdline &
    
        wait4curlcount 0
    }
    
    
    # ---------- BACKUP
    
    # backup with loop over instances
    # param 1  string  globbing filter to config files
    function couchdb2.backup(){
    
        echo "--- instance: $PROFILENAME"
        if curl --head -X GET "$COUCH_URL" 2>/dev/null | grep "^HTTP.* 200 "; then
        echo OK, connected.
        sleep 1
        _doBackupOfSingleInstance
    
        else
        rc=$rc+1
        color error
        echo "ERROR: couch DB instance is not available or canot be accessed with these credentials in config file"
        # repeat curl to show the error message
        curl -X GET "$COUCH_URL"
        color reset
        fi
    
        echo
        echo "--- $(date) done."
        echo
    }
    
    # make backup of all databases in a couchdb instance
    # global: COUCH_URL
    # global: PROFILENAME
    function _doBackupOfSingleInstance(){
    
        create_targetdir
        local ARCHIVE_DIR2="${ARCHIVE_DIR}/deleted_databases"
        for _dir in "${ARCHIVE_DIR}"      "${ARCHIVE_DIR}/seq"  "${ARCHIVE_DIR}/security" \
                    "${ARCHIVE_DIR2}"     "${ARCHIVE_DIR2}/seq" "${ARCHIVE_DIR2}/security"
        do
        test -d "$_dir" || (echo "creating $_dir" ; mkdir -p "$_dir" )
        done
    
        local iChunksize=100
        local iParallel=6
    
        local dblistfile
        local sSequenceCurrent
        local sSequenceLast
        local OUTFILE
        local ARCHIVFILE
        local SEQFILE
        local SECURITYFILE
        local iTsStart; typeset -i iTsStart
        local iTsTotal; typeset -i iTsTotal
        local iDbPerSec; typeset -i iDbPerSec
    
        dblistfile="/tmp/dblist_${PROFILENAME}.txt"
    
        # this is just a caching file of the sequence id of the last backup and can be safely deleted.
        seqfile="${ARCHIVE_DIR}/seq/all_seqids_of_last_backups_cache.txt"
    
        echo "--- $( date +%H:%M:%S ) Get list of all databases"
        _getDblist >"${dblistfile}"
    
        typeset -i iDbTotal; iDbTotal=$( wc -l < "$dblistfile")
        typeset -i iDb=0        # counter for number of database in the loop
        typeset -i iDbCount=0   # counter for backed up databases
        echo "${PROFILENAME} has $iDbTotal databases"
    
        # detect deleted databases: 
        echo
        echo "--- $( date +%H:%M:%S ) MOVE deleted databases "
        echo "... into ${ARCHIVE_DIR2}"
        echo
        for dumpfile in $( find "${ARCHIVE_DIR}/" -maxdepth 1 -type f -name "*.couchdbdump.gz" )
        do
            # extract database name: get basename and cut extension
            # dbname=$( basename $dumpfile | sed "s#\.couchdbdump\.gz##g" )
            dbname=${dumpfile##*/}
            dbname=${dbname/%.couchdbdump.gz//}
            dbname=${dbname/\/}
    
            if ! grep "^${dbname}" "${dblistfile}"  >/dev/null; then
                SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname}
                SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json
                echo "DELETED $dbname ... $( ls -l ${dumpfile} | cut -f 5- -d ' ' )"
                mv "${dumpfile}"     "${ARCHIVE_DIR2}"
                mv "${SEQFILE}"      "${ARCHIVE_DIR2}/seq/"
                mv "${SECURITYFILE}" "${ARCHIVE_DIR2}/security/"
            fi
        done
    
        echo
        echo "--- $( date +%H:%M:%S ) DUMP databases"
        echo "    of instance ${PROFILENAME}: $iDbTotal databases"
        echo "    TO BACKUP ${BACKUP_TARGETDIR}"
        echo "      ARCHIVE ${ARCHIVE_DIR}"
        echo
    
        echo "----- $( date +%H:%M:%S ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per process"
        seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' '  | awk '{ sub(/-.*/, "", $2 ); print $1 "," $2  }' )
        #                                                                                             ^        ^           ^                    ^                              ^
        #                                                     db_name + update_seq in a single line --+        |           |                    |   and back: space to comma --+
        #                                                                                      delete quotes --+           |                    +-- remove string after first minus char
        #                                                                                comma to space (for awk values) --+
        # the result is database name + comma + sequence id
        # echo "$seq" | head -3
        # _users,7688
        # candidate-00649860284626638ac6fd12bf000df5,40
        # candidate-04561cddbd0fa305714b48a57929d8b4,3
    
        echo "----- $( date +%H:%M:%S ) - reading current sequence ids..."
        declare -A aSeq
        for line in $( echo "$seq" )
        do
            IFS="," read -r db seqid <<< "$line"
            aSeq+=([$db]=$seqid)
        done
    
        echo "----- $( date +%H:%M:%S ) - reading sequence ids of last backup..."
        declare -A aSeqBackup
        for line in $( cat "${seqfile}" 2>/dev/null )
        do
            IFS="," read -r db seqid <<< "$line"
            aSeqBackup+=([$db]=$seqid)
        done
    
        iTsStart=$( date +%s)
        for dbname in $( cat "$dblistfile" )
        do
            iDb+=1
            echo -n "----- $( date +%H:%M:%S ) ${PROFILENAME} -- $iDb of $iDbTotal - ${dbname} - "
    
            ARCHIVFILE=${ARCHIVE_DIR}/${dbname}.couchdbdump.gz
            SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname}
            SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json
    
            # compare current sequence id with last backup sequence id
            sSequenceCurrent="${aSeq[$dbname]}"
            sSequenceLast="${aSeqBackup[$dbname]:-$(cat ${SEQFILE} 2>/dev/null | cut -f 1 -d '-')}"
            aSeqBackup[${dbname}]=$sSequenceLast
            if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] && [ -f "$ARCHIVFILE" ]; then
            echo "SKIP: still on sequence ${sSequenceLast}"      
            else
            OUTFILE=${BACKUP_TARGETDIR}/$(get_outfile "${dbname}").couchdbdump
            if [ -z "$sSequenceCurrent" ]; then
                echo "WARNING: unable to fetch current sequence ID - maybe the database was deleted."
            else
                echo
                echo "update_seq --+-- current [${sSequenceCurrent}]" 
                echo "             +-- backup  [${sSequenceLast}]"
                echo -n "Need to backup ... "
    
                # TODO
                couchbackup --db "${dbname}" >"${OUTFILE}".progress 2>/dev/null && mv "${OUTFILE}".progress "${OUTFILE}"
                fetchrc
    
                # $myrc is last returncode - set in fetchrc
                if [ $myrc -eq 0 ]; then
                echo -n "gzip ... "
                compress_file "$OUTFILE"
                fetchrc
                if [ $myrc -eq 0 ]; then
                    iDbCount+=1
    
                    aSeqBackup[${dbname}]=${sSequenceCurrent}
                    # flushing cached information
                    rm -f "${seqfile}" 2>/dev/null
    
                    cp "${OUTFILE}"* "${ARCHIVFILE}"                             \
                    && echo "${sSequenceCurrent}">"${SEQFILE}"                 \
                    && _couchapi GET "${dbname}/_security" > "${SECURITYFILE}"
                    ls -l "${ARCHIVFILE}" "${SEQFILE}" "${SECURITYFILE}"
                fi
                else
                echo "ERROR occured while dumping - abort"
                fi
                ls -l "$OUTFILE"*
                echo
            fi # if [ -z "$sSequenceCurrent" ]; then
            fi # if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] ...
        done
        iTsTotal=$( date +%s)-$iTsStart
        iDbPerSec=$iDbTotal/$iTsTotal
    
        # cache sequence ids in a file
        echo "----- $( date +%H:%M:%S ) - writing sequence ids ..."
        rm -f "${seqfile}" 2>/dev/null
        for key in "${!aSeqBackup[@]}"; do
            echo "$key,${aSeqBackup[$key]}" >> "${seqfile}"
        done
        ls -l "${seqfile}"
        echo
    
        rm -f "$dblistfile"
    
        echo "__DB__$SERVICENAME backup INFO: ${PROFILENAME} - backed up $iDbCount dbs of $iDbTotal total ... in $iTsTotal sec ($iDbPerSec databases per sec)"
    
    }
    
    # ---------- RESTORE
    #
    # example: 
    #
    # (1)
    # cd /var/iml-archive/couchdb2
    # or
    # cd /var/iml-backup/couchdb2
    #
    # (2)
    # /opt/imlbackup/client/localdump.sh restore couchdb2 measured-preview-couchdbcluster/mydb.couchdbdump.gz axel-01
    #                                    ^       ^        ^                                                   ^
    #                                    |       |        |                                                   |
    #     action: restore ---------------+       |        |                                                   |
    #     database service: couchdb2 ------------+        |                                                   |
    #     filename with instance as relative path --------+                                                   |
    #     optional: target database --------------------------------------------------------------------------+
    #
    
    # restore a single backup file; the instance and db name will be detected from file
    # param  string  filename of db dump (full path or relative to BACKUP_TARGETDIR)
    # param  string  optional: target database; default: detect name from import database 
    function couchdb2.restore(){
        sMyfile=$1
        dbname=$2
    
        bFastMode=0 # 0 = delete db first and import | 1 = create and import (on empty instance only)
    
        echo
        h2 "analyze dump $sMyfile"
    
        # COUCHDB_INSTANCE=$(echo $sMyfile | sed "s#${BACKUP_TARGETDIR}##g" | sed "s#\./##g" | sed "s#^/##g" | cut -f 1 -d "/")
        # echo "detected COUCHDB_INSTANCE   : [${COUCHDB_INSTANCE}]"
        # if [ -z "$COUCHDB_INSTANCE" ]; then
        #   echo "ERROR: Name of the instance was not detected."
        #   echo "       For couchdb restore you should cd to the ${BACKUP_TARGETDIR} or ${ARCHIVE_DIR}"
        #   exit 1
        # fi
    
        local _sourceDB
        _sourceDB="$( guessDB $sMyfile | sed 's#.couchdbdump.gz$##' )"
        echo "detected source database    : [${_sourceDB}]"
    
        if [ -z "$dbname" ]; then
            dbname="$_sourceDB"
            echo "using the same as target    : [${dbname}]"
        else
            echo "using db schema from param 2: [${dbname}]"
        fi
    
        echo
      
        if [ $bFastMode -eq 0 ]; then
            curl --head -X GET $COUCH_URL 2>/dev/null | grep "^HTTP.* 200 " >/dev/null
            if [ $? -ne 0 ]; then
                color error
                echo ERROR: couch DB instance is not available
                curl -X GET $COUCH_URL
                color reset
                exit 1
            fi
            color ok
            echo OK
            color reset
        fi
    
        echo
    
        # _getDblist | grep "^${dbname}$"
        # if [ $? -eq 0 ]; then
        #   echo DB exists ... need to drop it first
        # fi
    
        if [ $bFastMode -eq 0 ]; then
            h2 deleting database [$dbname] ...
            color cmd
            _couchapi DELETE $dbname
            fetchrc
            color reset
        fi
    
        h2 creating database [$dbname] ...
        color cmd
        _couchapi PUT $dbname
        fetchrc
        color reset
    
        h2 import file ...
        color cmd
        zcat ${sMyfile} | couchrestore --db $dbname
        fetchrc
        color reset
    
        h2 add security infos ...
        # todo: this will fail when restoring from "deleted_databases" folder
        SECURITYFILE="${ARCHIVE_DIR}/security/__security__${_sourceDB}.json"
        if [ -f "$SECURITYFILE" ]; then
            SECDATA="$( cat $SECURITYFILE )"
            color cmd
            echo "add security data: $SECDATA"
            _couchapi PUT "${dbname}/_security" "$SECDATA"
            fetchrc
            color reset
        else
            color warning 
            echo "WARNING: no security data file was found: $SECURITYFILE"
            color reset
        fi
    
        echo
    
    }
    
    # --------------------------------------------------------------------------------
    # MAIN
    # --------------------------------------------------------------------------------
    
    
    # ----- check requirements
    
    # --- is a couchd here
    # j_requireProcess "couchdb"   1
    
    # --- very specific :-/ ... check available config files
    
    j_requireBinary  "curl"         1
    j_requireBinary  "couchbackup"  1
    j_requireBinary  "couchrestore" 1
    
    if [ $rc -ne 0 ]; then
        rc=1
        color.echo error "ERROR: Missing a binary. Your Couchdb data cannot be dumped."
    else
        echo
    
        test "$1" = "ALL" && shift 1
        action=$1
        shift 1
        "${SERVICENAME}.$action" $*
    
        # if [ "$1" = "restore" ]; then
        #     echo
        #     shift 1
        #     restoreByFile $*
    
        # else
        #     shift 1
    
        #     # remove keyword ALL which is used for localdump.sh to loop over all db types
        #     test "$1" = "ALL" && shift 1
    
        #     doBackup $*
        # fi
    
    fi
    
    echo "__DB__$SERVICENAME INFO: $0 $* [$SERVICENAME] final returncode rc=$rc"
    
    # --------------------------------------------------------------------------------