Skip to content
Snippets Groups Projects
Select Git revision
  • 7e67d1189e7286fc3457bad8f9b1a98bc79fb3ac
  • master default protected
  • 7771-harden-postgres-backup
  • pgsql-dump-with-snapshots
  • update-colors
  • update-docs-css
  • usb-repair-stick
  • desktop-notification
  • 7000-corrections
  • db-detector
10 results

couchdb2.sh

Blame
  • couchdb2.sh 17.37 KiB
    #!/bin/bash
    # ================================================================================
    #
    # LOCALDUMP :: COUCHDB2 - using nodejs tools couchbackup and couchrestore
    # https://github.com/cloudant/couchbackup
    #
    # Backup:
    # - creates gzipped plain text backups (JSON) from each scheme
    # - write sequence id into a text file
    # - store extra file with security infos
    # - latest backup set is written to archive
    #
    # --------------------------------------------------------------------------------
    # ah - Axel Hahn <axel.hahn@iml.unibe.ch>
    # ds - Daniel Schueler <daniel.schueler@iml.unibe.ch>
    #
    # 2019-11-13  .....  v1.0  initial version with backup and restore (single DB)
    # 2020-05-19  .....  v1.1  backup a single or multiple couchdb instances by globbing param
    #                          ./localdump.sh backup couchdb2 demo
    # 2021-10-11  .....  v1.2  added fastmode in restore: no test connect, do not 
    #                          delete DB before create request
    # 2022-01-20         v1.3  fixes with shellcheck
    # 2022-03-17         v1.4  WIP: add lines with prefix __DB__
    # 2022-04-07         v1.5  check archive file, not only seq file
    # 2022-04-14         v1.6  backup security infos (no restore yet)
    # 2022-04-21         v1.7  restore security infos
    # 2022-10-07  ah     v1.8  unescape regex with space to prevent "grep: warning: stray \ before white space"
    # 2023-06-06  ah     v1.9  show a warning if the sequence id was not fetched
    # 2023-06-12  ah     v1.10 skip couchdb dump if no sequence id was detected (=db deleted since fetching list of all dbs)
    # 2023-06-26  ah     v1.11 speed up detection of changed databases
    # 2023-06-27  ah     v1.12 enable tmp file for dblist again (faster); speedup loops in backup
    # 2023-06-28  ah     v1.13 optimize backup move OUTFILE; measure time; cache backed up sequence ids
    # ================================================================================
    
    if [ -z "$BACKUP_TARGETDIR" ]; then
      echo "ERROR: you cannot start $(basename "$0") directly"
      rc=$rc+1
      exit 1
    fi
    
    # --------------------------------------------------------------------------------
    # CONFIG
    # --------------------------------------------------------------------------------
    
    # contains *.config files for each instance
    CFGDIR=~/.iml_backup/couchdb2
    
    # UNUSED
    # dirPythonPackages=/usr/lib/python2.7/site-packages
    
    # now set in localdump.sh
    # ARCHIVE_DIR=$(_j_getvar "${JOBFILE}" dir-dbarchive)/couchdb2
    
    # --------------------------------------------------------------------------------
    # FUNCTIONS
    # --------------------------------------------------------------------------------
    
    # make an couch api request
    # param  string  method ... one of GET|POST|DELETE
    # param  string  relative url, i.e. _all_dbs or _stats
    # param  string  optional: data for POST|PUT requests
    function _couchapi(){
      local method=$1
      local apiurl=$2
      # local outfile=$3
      local data=$3
    
      sParams=
      # sParams="$sParams -u ${couchdbuser}:${couchdbpw}"
      sParams="$sParams -X ${method}"
      sParams="$sParams ${COUCH_URL}${apiurl}"
      # if [ ! -z "$outfile" ]; then
      #   sParams="$sParams -o ${outfile}"
      # fi
      if [ -n "$data" ]; then
        sParams="$sParams -d ${data}"
      fi
      curl $sParams 2>/dev/null
    }
    
    function _getDblist(){
       _couchapi GET _all_dbs | sed 's#\"#\n#g' | grep -Ev "^(\[|\,|\])$" | grep -v _replicator | grep -v _global_changes
    }
    
    # get count of active curl prozesses
    function curlCount(){
      ps -ef | grep -v grep | grep "curl" | wc -l
    }
    
    # wait until min N curl prozesses exist
    function wait4curlcount(){
      typeset -i local iContinue
      typeset -i local iCount
    
      iContinue=${1:-0}
      iCount=$( curlCount )
    
      test $iCount -gt $iContinue && wait4curlcount $iContinue
    }
    
    # optimized curl requests to get metadata from all databases
    # used in _doBackupOfSingleInstance
    # it returns a JSON line for each database
    #
    # param  integer  iChunksize  count urls per curl command
    # param  integer  iParallel   count of curl processes
    # param  string   dblistfile  path+file to list of database
    function reqCombined(){
      local iChunksize; typeset -i iChunksize; iChunksize=$1
      local iParallel;  typeset -i iParallel;  iParallel=$2
      local dblistfile;                        dblistfile="$3"
    
      typeset -i iCounter=0
      cmdline=
    
      for mydb in $( cat $dblistfile )
      do 
    
          iCounter+=1
          test -n "$cmdline" && cmdline+=" -: "
          cmdline+="${COUCH_URL}${mydb} "
    
          if [ $iCounter -ge $iChunksize ]; then
    
              curl -s $cmdline &
    
              # wait untile count of curl proecses is lower maximum
              wait4curlcount $iParallel
    
              iCounter=0
              cmdline=
          fi
    
      done
      test -n "${cmdline}" && curl -s $cmdline &
    
      wait4curlcount 0
    }
    
    
    
    # ---------- CONFIG/ INSTANCES
    
    # get valid configured instances
    function getInstances(){
     for mycfg in $(ls -1 ${CFGDIR}/*${1}*.config)
     do
       if . "$mycfg"; then
         echo $(basename "${mycfg}" | cut -f 1 -d ".")
       fi
     done
    }
    
    
    # load the config of an existing instance
    # see getInstances to get valid names
    # param  string  name of the instance to load
    function loadInstance(){
      COUCH_URL=
      if ! . "${CFGDIR}/${1}.config"; then
        color error
        echo ERROR: invalid instance: $1 - the config file cannot be sourced
        color reset
        exit 1
      fi
      if [ -z "${COUCH_URL}" ]; then
        color error
        echo "ERROR: invalid instance: $1 - the config file has no COUCH_URL"
        color reset
        exit 1
      fi
    
    }
    
    
    # ---------- BACKUP
    
    # backup with loop over instances
    # param 1  string  globbing filter to config files
    function doBackup(){
      # # for mycfg in `ls -1 ~/.iml_backup/couchdb/*.config`
      # for COUCHDB_INSTANCE in $(getInstances $1)
      # do
      #   loadInstance "$COUCHDB_INSTANCE"
    
          echo "--- instance: $PROFILENAME"
          if curl --head -X GET "$COUCH_URL" 2>/dev/null | grep "^HTTP.* 200 "; then
            echo OK, connected.
            sleep 1
            _doBackupOfSingleInstance
    
          else
            rc=$rc+1
            color error
            echo "ERROR: couch DB instance is not available or canot be accessed with these credentials in config file"
            # repeat curl to show the error message
            curl -X GET "$COUCH_URL"
            color reset
          fi
    
        echo
        echo "--- $(date) done."
        echo
      # done
    }
    
    # make backup of all databases in a couchdb instance
    # global: COUCH_URL
    # global: PROFILENAME
    function _doBackupOfSingleInstance(){
    
      create_targetdir
      local ARCHIVE_DIR2="${ARCHIVE_DIR}/deleted_databases"
      for _dir in "${ARCHIVE_DIR}"      "${ARCHIVE_DIR}/seq"  "${ARCHIVE_DIR}/security" \
                  "${ARCHIVE_DIR2}"     "${ARCHIVE_DIR2}/seq" "${ARCHIVE_DIR2}/security"
      do
        test -d "$_dir" || (echo "creating $_dir" ; mkdir -p "$_dir" )
      done
    
      local iChunksize=100
      local iParallel=6
    
      local dblistfile
      local sSequenceCurrent
      local sSequenceLast
      local OUTFILE
      local ARCHIVFILE
      local SEQFILE
      local SECURITYFILE
      local iTsStart; typeset -i iTsStart
      local iTsTotal; typeset -i iTsTotal
      local iDbPerSec; typeset -i iDbPerSec
    
      dblistfile="/tmp/dblist_${PROFILENAME}.txt"
    
      # this is just a caching file of the sequence id of the last backup and can be safely deleted.
      seqfile="${ARCHIVE_DIR}/seq/all_seqids_of_last_backups_cache.txt"
    
      echo "--- $( date +%H:%M:%S ) Get list of all databases"
      _getDblist >"${dblistfile}"
    
      typeset -i iDbTotal; iDbTotal=$( wc -l < "$dblistfile")
      typeset -i iDb=0        # counter for number of database in the loop
      typeset -i iDbCount=0   # counter for backed up databases
      echo "${PROFILENAME} has $iDbTotal databases"
    
      # detect deleted databases: 
      echo
      echo "--- $( date +%H:%M:%S ) MOVE deleted databases "
      echo "... into ${ARCHIVE_DIR2}"
      echo
      for dumpfile in $( find "${ARCHIVE_DIR}/" -maxdepth 1 -type f -name "*.couchdbdump.gz" )
      do
          # extract database name: get basename and cut extension
          # dbname=$( basename $dumpfile | sed "s#\.couchdbdump\.gz##g" )
          dbname=${dumpfile##*/}
          dbname=${dbname/%.couchdbdump.gz//}
          dbname=${dbname/\/}
    
          if ! grep "^${dbname}" "${dblistfile}"  >/dev/null; then
                  SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname}
                  SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json
                  echo "DELETED $dbname ... $( ls -l ${dumpfile} | cut -f 5- -d ' ' )"
                  mv "${dumpfile}"     "${ARCHIVE_DIR2}"
                  mv "${SEQFILE}"      "${ARCHIVE_DIR2}/seq/"
                  mv "${SECURITYFILE}" "${ARCHIVE_DIR2}/security/"
          fi
      done
    
      echo
      echo "--- $( date +%H:%M:%S ) DUMP databases"
      echo "    of instance ${PROFILENAME}: $iDbTotal databases"
      echo "    TO BACKUP ${BACKUP_TARGETDIR}"
      echo "      ARCHIVE ${ARCHIVE_DIR}"
      echo
    
      echo "----- $( date +%H:%M:%S ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per process"
      seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' '  | awk '{ sub(/-.*/, "", $2 ); print $1 "," $2  }' )
      #                                                                                             ^        ^           ^                    ^                              ^
      #                                                     db_name + update_seq in a single line --+        |           |                    |   and back: space to comma --+
      #                                                                                      delete quotes --+           |                    +-- remove string after first minus char
      #                                                                                comma to space (for awk values) --+
      # the result is database name + comma + sequence id
      # echo "$seq" | head -3
      # _users,7688
      # candidate-00649860284626638ac6fd12bf000df5,40
      # candidate-04561cddbd0fa305714b48a57929d8b4,3
    
      echo "----- $( date +%H:%M:%S ) - reading current sequence ids..."
      declare -A aSeq
      for line in $( echo "$seq" )
      do
        IFS="," read -r db seqid <<< "$line"
        aSeq+=([$db]=$seqid)
      done
    
      echo "----- $( date +%H:%M:%S ) - reading sequence ids of last backup..."
      declare -A aSeqBackup
      for line in $( cat "${seqfile}" 2>/dev/null )
      do
        IFS="," read -r db seqid <<< "$line"
        aSeqBackup+=([$db]=$seqid)
      done
    
      iTsStart=$( date +%s)
      for dbname in $( cat "$dblistfile" )
      do
        iDb+=1
        echo -n "----- $( date +%H:%M:%S ) ${PROFILENAME} -- $iDb of $iDbTotal - ${dbname} - "
    
        ARCHIVFILE=${ARCHIVE_DIR}/${dbname}.couchdbdump.gz
        SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname}
        SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json
    
        # compare current sequence id with last backup sequence id
        sSequenceCurrent="${aSeq[$dbname]}"
        sSequenceLast="${aSeqBackup[$dbname]:-$(cat ${SEQFILE} 2>/dev/null | cut -f 1 -d '-')}"
        aSeqBackup[${dbname}]=$sSequenceLast
        if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] && [ -f "$ARCHIVFILE" ]; then
          echo "SKIP: still on sequence ${sSequenceLast}"      
        else
          OUTFILE=${BACKUP_TARGETDIR}/$(get_outfile "${dbname}").couchdbdump
          if [ -z "$sSequenceCurrent" ]; then
            echo "WARNING: unable to fetch current sequence ID - maybe the database was deleted."
          else
            echo
            echo "update_seq --+-- current [${sSequenceCurrent}]" 
            echo "             +-- backup  [${sSequenceLast}]"
            echo -n "Need to backup ... "
    
            # TODO
            couchbackup --db "${dbname}" >"${OUTFILE}".progress 2>/dev/null && mv "${OUTFILE}".progress "${OUTFILE}"
            fetchrc
    
            # $myrc is last returncode - set in fetchrc
            if [ $myrc -eq 0 ]; then
              echo -n "gzip ... "
              compress_file "$OUTFILE"
              fetchrc
              if [ $myrc -eq 0 ]; then
                iDbCount+=1
    
                aSeqBackup[${dbname}]=${sSequenceCurrent}
                # flushing cached information
                rm -f "${seqfile}" 2>/dev/null
    
                cp "${OUTFILE}"* "${ARCHIVFILE}"                             \
                  && echo "${sSequenceCurrent}">"${SEQFILE}"                 \
                  && _couchapi GET "${dbname}/_security" > "${SECURITYFILE}"
                ls -l "${ARCHIVFILE}" "${SEQFILE}" "${SECURITYFILE}"
              fi
            else
              echo "ERROR occured while dumping - abort"
            fi
            ls -l "$OUTFILE"*
            echo
          fi # if [ -z "$sSequenceCurrent" ]; then
        fi # if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] ...
      done
      iTsTotal=$( date +%s)-$iTsStart
      iDbPerSec=$iDbTotal/$iTsTotal
    
      # cache sequence ids in a file
      echo "----- $( date +%H:%M:%S ) - writing sequence ids ..."
      rm -f "${seqfile}" 2>/dev/null
      for key in "${!aSeqBackup[@]}"; do
        echo "$key,${aSeqBackup[$key]}" >> "${seqfile}"
      done
      ls -l "${seqfile}"
      echo
    
      rm -f "$dblistfile"
    
      echo "__DB__$SERVICENAME backup INFO: ${PROFILENAME} - backed up $iDbCount dbs of $iDbTotal total ... in $iTsTotal sec ($iDbPerSec databases per sec)"
    
    }
    
    # ---------- RESTORE
    #
    # example: 
    #
    # (1)
    # cd /var/iml-archive/couchdb2
    # or
    # cd /var/iml-backup/couchdb2
    #
    # (2)
    # /opt/imlbackup/client/localdump.sh restore couchdb2 measured-preview-couchdbcluster/mydb.couchdbdump.gz axel-01
    #                                    ^       ^        ^                                                   ^
    #                                    |       |        |                                                   |
    #     action: restore ---------------+       |        |                                                   |
    #     database service: couchdb2 ------------+        |                                                   |
    #     filename with instance as relative path --------+                                                   |
    #     optional: target database --------------------------------------------------------------------------+
    #
    
    # restore a single backup file; the instance and db name will be detected from file
    # param  string  filename of db dump (full path or relative to BACKUP_TARGETDIR)
    # param  string  optional: target database; default: detect name from import database 
    function restoreByFile(){
      sMyfile=$1
      dbname=$2
    
      bFastMode=0 # 0 = delete db first and import | 1 = create and import (on empty instance only)
    
      echo
      h2 "analyze dump $sMyfile"
    
      # COUCHDB_INSTANCE=$(echo $sMyfile | sed "s#${BACKUP_TARGETDIR}##g" | sed "s#\./##g" | sed "s#^/##g" | cut -f 1 -d "/")
      # echo "detected COUCHDB_INSTANCE   : [${COUCHDB_INSTANCE}]"
      # if [ -z "$COUCHDB_INSTANCE" ]; then
      #   echo "ERROR: Name of the instance was not detected."
      #   echo "       For couchdb restore you should cd to the ${BACKUP_TARGETDIR} or ${ARCHIVE_DIR}"
      #   exit 1
      # fi
    
      local _sourceDB="$( guessDB $sMyfile | sed 's#.couchdbdump.gz$##' )"
      echo "detected source database    : [${_sourceDB}]"
    
      if [ -z "$dbname" ]; then
        dbname="$_sourceDB"
        echo "using the same as target    : [${dbname}]"
      else
        echo "using db schema from param 2: [${dbname}]"
      fi
    
      echo
    
      # loadInstance $COUCHDB_INSTANCE
      
      if [ $bFastMode -eq 0 ]; then
        echo connect $couchdbhost on port $couchdbport with user $couchdbuser
        curl --head -X GET $COUCH_URL 2>/dev/null | grep "^HTTP.* 200 " >/dev/null
        if [ $? -ne 0 ]; then
            color error
            echo ERROR: couch DB instance is not available
            curl -X GET $COUCH_URL
            color reset
            exit 1
        fi
        color ok
        echo OK
        color reset
      fi
    
      echo
    
      # _getDblist | grep "^${dbname}$"
      # if [ $? -eq 0 ]; then
      #   echo DB exists ... need to drop it first
      # fi
    
      if [ $bFastMode -eq 0 ]; then
        h2 deleting database [$dbname] ...
        color cmd
        _couchapi DELETE $dbname
        fetchrc
        color reset
      fi
    
      h2 creating database [$dbname] ...
      color cmd
      _couchapi PUT $dbname
      fetchrc
      color reset
    
      h2 import file ...
      color cmd
      zcat ${sMyfile} | couchrestore --db $dbname
      fetchrc
      color reset
    
      h2 add security infos ...
      # todo: this will fail when restoring from "deleted_databases" folder
      SECURITYFILE="${ARCHIVE_DIR}/security/__security__${_sourceDB}.json"
      if [ -f "$SECURITYFILE" ]; then
        SECDATA="$( cat $SECURITYFILE )"
        color cmd
        echo "add security data: $SECDATA"
        _couchapi PUT "${dbname}/_security" "$SECDATA"
        fetchrc
        color reset
      else
        color warning 
        echo "WARNING: no security data file was found: $SECURITYFILE"
        color reset
      fi
    
      echo
    
    }
    
    # --------------------------------------------------------------------------------
    # MAIN
    # --------------------------------------------------------------------------------
    
    
    # ----- check requirements
    
    # --- is a couchd here
    # j_requireProcess "couchdb"   1
    
    # --- very specific :-/ ... check available config files
    ls -1 ${CFGDIR}/* >/dev/null 2>&1
    rc=$rc+$?
    
    
    if [ $rc -eq 0 ]; then
      # echo OK: couchdb2 config was found on this system ... checking requirements for backup ...
    
      j_requireBinary  "curl"         1
      j_requireBinary  "couchbackup"  1
      j_requireBinary  "couchrestore" 1
    
      #ls ${dirPythonPackages}/couchdb/tools/dump.py ${dirPythonPackages}/couchdb/tools/load.py >/dev/null && echo "OK: python couchdb tools were found"
      #rc=$rc+$?
    
    
      if [ $rc -eq 0 ]; then
        echo
    
        if [ "$1" = "restore" ]; then
          echo
          shift 1
          restoreByFile $*
    
        else
          shift 1
    
          # remove keyword ALL which is used for localdump.sh to loop over all db types
          test "$1" = "ALL" && shift 1
    
          doBackup $*
        fi
    
      else
        color error
        echo ERROR: Couchdb is here but I am missing things for the backup :-/
        color reset
      fi
    
    else
      rc=0
      echo "__DB__$SERVICENAME SKIP: couchdb2 config does not seem to be here"
    fi
    
    
    echo "__DB__$SERVICENAME INFO: $0 $* [$SERVICENAME] final returncode rc=$rc"
    
    # --------------------------------------------------------------------------------