Skip to content
Snippets Groups Projects
Select Git revision
  • 0431c09a975d38dd605d31078a69794a18a0679e
  • master default protected
  • 7771-harden-postgres-backup
  • pgsql-dump-with-snapshots
  • update-colors
  • update-docs-css
  • usb-repair-stick
  • desktop-notification
  • 7000-corrections
  • db-detector
10 results

couchdb2.sh

Blame
  • couchdb2.sh 16.42 KiB
    #!/bin/bash
    # ================================================================================
    #
    # LOCALDUMP :: COUCHDB2 - using nodejs tools couchbackup and couchrestore
    # https://github.com/cloudant/couchbackup
    #
    # Backup:
    # - creates gzipped plain text backups (JSON) from each scheme
    # - write sequence id into a text file
    # - store extra file with security infos
    # - latest backup set is written to archive
    #
    # --------------------------------------------------------------------------------
    # ah - Axel Hahn <axel.hahn@iml.unibe.ch>
    # ds - Daniel Schueler <daniel.schueler@iml.unibe.ch>
    #
    # 2019-11-13  .....  v1.0  initial version with backup and restore (single DB)
    # 2020-05-19  .....  v1.1  backup a single or multiple couchdb instances by globbing param
    #                          ./localdump.sh backup couchdb2 demo
    # 2021-10-11  .....  v1.2  added fastmode in restore: no test connect, do not 
    #                          delete DB before create request
    # 2022-01-20         v1.3  fixes with shellcheck
    # 2022-03-17         v1.4  WIP: add lines with prefix __DB__
    # 2022-04-07         v1.5  check archive file, not only seq file
    # 2022-04-14         v1.6  backup security infos (no restore yet)
    # 2022-04-21         v1.7  restore security infos
    # 2022-10-07  ah     v1.8  unescape regex with space to prevent "grep: warning: stray \ before white space"
    # 2023-06-06  ah     v1.9  show a warning if the sequence id was not fetched
    # 2023-06-12  ah     v1.10 skip couchdb dump if no sequence id was detected (=db deleted since fetching list of all dbs)
    # 2023-06-26  ah     v1.11 speed up detection of changed databases
    # 2023-06-27  ah     v1.12 enable tmp file for dblist again (faster); speedup loops in backup
    # ================================================================================
    
    if [ -z "$BACKUP_TARGETDIR" ]; then
      echo "ERROR: you cannot start $(basename "$0") directly"
      rc=$rc+1
      exit 1
    fi
    
    # --------------------------------------------------------------------------------
    # CONFIG
    # --------------------------------------------------------------------------------
    
    # contains *.config files for each instance
    CFGDIR=~/.iml_backup/couchdb2
    
    # UNUSED
    # dirPythonPackages=/usr/lib/python2.7/site-packages
    
    ARCHIVE_DIR=$(_j_getvar "${JOBFILE}" dir-dbarchive)/couchdb2
    
    # --------------------------------------------------------------------------------
    # FUNCTIONS
    # --------------------------------------------------------------------------------
    
    # make an couch api request
    # param  string  method ... one of GET|POST|DELETE
    # param  string  relative url, i.e. _all_dbs or _stats
    # param  string  optional: data for POST|PUT requests
    function _couchapi(){
      local method=$1
      local apiurl=$2
      # local outfile=$3
      local data=$3
    
      sParams=
      # sParams="$sParams -u ${couchdbuser}:${couchdbpw}"
      sParams="$sParams -X ${method}"
      sParams="$sParams ${COUCH_URL}${apiurl}"
      # if [ ! -z "$outfile" ]; then
      #   sParams="$sParams -o ${outfile}"
      # fi
      if [ -n "$data" ]; then
        sParams="$sParams -d ${data}"
      fi
      curl $sParams 2>/dev/null
    }
    
    function _getDblist(){
       _couchapi GET _all_dbs | sed 's#\"#\n#g' | grep -Ev "^(\[|\,|\])$" | grep -v _replicator | grep -v _global_changes
    }
    
    # get value update_seq of given couchdb name
    function _getDbSeq(){
      # _couchapi GET $1 | sed 's#,\"#\n"#g' | egrep -v "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"'
      _couchapi GET "$1" | sed 's#,\"#\n"#g' | grep -Ev "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' | cut -f 1 -d '-'
    }
    
    
    
    # active curl prozesses
    function curlCount(){
      ps -ef | grep -v grep | grep "curl" | wc -l
    }
    
    # wait until min N curl prozesses exist
    function wait4curlcount(){
      typeset -i local iContinue
      typeset -i local iCount
    
      iContinue=${1:-0}
      iCount=$( curlCount )
    
      test $iCount -gt $iContinue && wait4curlcount $iContinue
    }
    
    function reqCombined(){
      typeset -i local iChunksize; iChunksize=$1
      typeset -i local iParallel;  iParallel=$2
      local dblistfile;            dblistfile="$3"
    
      typeset -i iCounter=0
      cmdline=
    
      # for mydb in $dblist
      for mydb in $( cat $dblistfile )
      do 
    
          iCounter+=1
          test -n "$cmdline" && cmdline+=" -: "
          cmdline+="${COUCH_URL}${mydb} "
    
          if [ $iCounter -ge $iChunksize ]; then
    
              curl -s $cmdline &
              # warte, solange Zahl curl Prozesse kleiner Maximum
              wait4curlcount $iParallel
    
              iCounter=0
              cmdline=
          fi
    
      done
      test -n "${cmdline}" && curl -s $cmdline &
    
      wait4curlcount 0
    }
    
    
    
    # ---------- CONFIG/ INSTANCES
    
    # get valid configured instances
    function getInstances(){
     for mycfg in $(ls -1 ${CFGDIR}/*${1}*.config)
     do
       if . "$mycfg"; then
         echo $(basename "${mycfg}" | cut -f 1 -d ".")
       fi
     done
    }
    
    
    # load the config of an existing instance
    # see getInstances to get valid names
    # param  string  name of the instance to load
    function loadInstance(){
      COUCH_URL=
      if ! . "${CFGDIR}/${1}.config"; then
        color error
        echo ERROR: invalid instance: $1 - the config file cannot be sourced
        color reset
        exit 1
      fi
      if [ -z "${COUCH_URL}" ]; then
        color error
        echo "ERROR: invalid instance: $1 - the config file has no COUCH_URL"
        color reset
        exit 1
      fi
    
    }
    
    
    # ---------- BACKUP
    
    # backup with loop over instances
    # param 1  string  globbing filter to config files
    function doBackup(){
      # for mycfg in `ls -1 ~/.iml_backup/couchdb/*.config`
      for COUCHDB_INSTANCE in $(getInstances $1)
      do
        loadInstance "$COUCHDB_INSTANCE"
    
          echo "--- instance: $COUCHDB_INSTANCE"
          if curl --head -X GET "$COUCH_URL" 2>/dev/null | grep "^HTTP.* 200 "; then
            echo OK, connected.
            sleep 2
            _doBackupOfSingleInstance
    
          else
            rc=$rc+1
            color error
            echo "ERROR: couch DB instance is not available or canot be accessed with these credentials in config file"
            # repeat curl to show the error message
            curl -X GET "$COUCH_URL"
            color reset
          fi
    
        echo
        echo "--- $(date) done."
        echo
      done
    }
    
    # make backup of all databases in a couchdb instance
    # global: COUCH_URL
    # global: COUCHDB_INSTANCE
    function _doBackupOfSingleInstance(){
    
      create_targetdir
      local ARCHIVE_DIR2="${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/deleted_databases"
      for _dir in "${BACKUP_TARGETDIR}/${COUCHDB_INSTANCE}" "${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq" "${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security" \
                  "${ARCHIVE_DIR2}"                         "${ARCHIVE_DIR2}/seq"                    "${ARCHIVE_DIR2}/security"
      do
        test -d "$_dir" || (echo "creating $_dir" ; mkdir -p "$_dir" )
      done
    
      echo "--- $( date ) Get list of all databases"
      # get a list of current databases
      # dblist=$( _getDblist )
      local dblistfile 
      dblistfile="/tmp/dblist_${COUCHDB_INSTANCE}"
      _getDblist >"${dblistfile}"
    
      # typeset -i iDbTotal=$( echo "$dblist" | wc -l )
      typeset -i iDbTotal=$( cat "$dblistfile" | wc -l )
      typeset -i iDb=0        # counter for number of database in the loop
      typeset -i iDbCount=0   # counter for backed up databases
      echo "${COUCHDB_INSTANCE} has $iDbTotal databases"
    
      # detect deleted databases: 
      echo
      echo "--- $( date ) MOVE deleted databases "
      echo "... into ${ARCHIVE_DIR2}"
      echo
      for dumpfile in $( find "${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/" -maxdepth 1 -type f -name "*.couchdbdump.gz" )
      do
          # extract database name: get basename and cut extension
          # dbname=$( basename $dumpfile | sed "s#\.couchdbdump\.gz##g" )
          dbname=${dumpfile##*/}
          dbname=${dbname/%.couchdbdump.gz//}
          dbname=${dbname/\/}
    
          # if ! grep "^${dbname}" <<< "$dblist" >/dev/null; then
          if ! grep "^${dbname}" "${dblistfile}"  >/dev/null; then
                  SEQFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq/__seq__${dbname}
                  SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${dbname}.json
                  echo "DELETED $dbname ... $( ls -l ${dumpfile} | cut -f 5- -d ' ' )"
                  mv "${dumpfile}"     "${ARCHIVE_DIR2}"
                  mv "${SEQFILE}"      "${ARCHIVE_DIR2}/seq/"
                  mv "${SECURITYFILE}" "${ARCHIVE_DIR2}/security/"
          fi
      done
      # done | tee /tmp/couch_archive_${COUCHDB_INSTANCE}.txt
    
      echo
      echo "--- $( date ) DUMP databases"
      echo "    of instance ${COUCHDB_INSTANCE}: $iDbTotal databases"
      echo "    TO BACKUP ${BACKUP_TARGETDIR}/${COUCHDB_INSTANCE}"
      echo "      ARCHIVE ${ARCHIVE_DIR}/${COUCHDB_INSTANCE}"
      echo
    
      # TODO: optimze creation of hash with database and sequence id
      local iChunksize=100
      local iParallel=6
      echo "--- $( date ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per connection"
      seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"')
      # seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' '  | awk '{ sub(/-.*/, "\1", $2 ); print $1 " " $2  }' )
      echo "$seq" | head
    
      declare -A aSeq
    
      echo "--- $( date ) - reading sequence ids..."
      for line in $( echo "$seq" )
      do
        IFS="," read -r db seqvalue <<< "$line"
        IFS="-" read -r seqid dummy <<< "$seqvalue"
        # seqid=$( echo $seqvalue | cut -f 1 -d '-')
        aSeq+=([$db]=$seqid)
      done
      # for line in $( echo "$seq" )
      # do
      #   IFS=" " read -r db seqid <<< "$line"
      #   echo "... $line ... $db - $seqid"
      #   aSeq+=([$db]=$seqid)
      # done
    
      dbname="_users"
    
      # echo "Seq ID von $dbname: ${aSeq[$dbname]}"
      # echo
      # exit
    
      for dbname in $( cat "$dblistfile" )
      do
        iDb+=1
        echo -n "----- $(date) ${COUCHDB_INSTANCE} -- $iDb of $iDbTotal - ${dbname} - "
        OUTFILE=${BACKUP_TARGETDIR}/${COUCHDB_INSTANCE}/$(get_outfile "${dbname}").couchdbdump
        ARCHIVFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/${dbname}.couchdbdump.gz
        SEQFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq/__seq__${dbname}
        SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${dbname}.json
    
        # sSequenceCurrent=$(_getDbSeq "${dbname}")
        sSequenceCurrent="${aSeq[$dbname]}"
    
        sSequenceLast=$(cat "${SEQFILE}" 2>/dev/null | cut -f 1 -d '-')
    #    sSequenceLast=`cat ${SEQFILE} 2>/dev/null | tr -d '\n'`
    
        # echo
        # echo "update_seq --+-- current [${sSequenceCurrent}]" 
        # echo "             +-- backup  [${sSequenceLast}]"
        if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] && [ -f "$ARCHIVFILE" ]; then
          echo "SKIP: still on sequence ${sSequenceLast}"
    
          # add security file for already existing databases 
          test -f  "${SECURITYFILE}" || (
            echo "INFO: creating missing security file ${SECURITYFILE}"
            _couchapi GET "${dbname}/_security" > "${SECURITYFILE}"
          )
          
        else
          if [ -z "$sSequenceCurrent" ]; then
            echo "WARNING: unable to fetch current sequence ID - maybe the database was deleted."
          else
            echo
            echo "update_seq --+-- current [${sSequenceCurrent}]" 
            echo "             +-- backup  [${sSequenceLast}]"
            echo -n "Need to backup ... "
    
            couchbackup --db "${dbname}" >"${OUTFILE}".progress 2>/dev/null && mv "${OUTFILE}".progress "${OUTFILE}"
            fetchrc
    
            # $myrc is last returncode - set in fetchrc
            if [ $myrc -eq 0 ]; then
              echo -n "gzip ... "
              compress_file "$OUTFILE"
              fetchrc
              if [ $myrc -eq 0 ]; then
                iDbCount+=1
                cp "${OUTFILE}"* "${ARCHIVFILE}"                             \
                  && echo "${sSequenceCurrent}">"${SEQFILE}"                 \
                  && _couchapi GET "${dbname}/_security" > "${SECURITYFILE}"
                ls -l "${ARCHIVFILE}" "${SEQFILE}" "${SECURITYFILE}"
              fi
            else
              echo "ERROR occured while dumping - abort"
            fi
            ls -l "$OUTFILE"*
            echo
          fi # if [ -z "$sSequenceCurrent" ]; then
        fi # if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] ...
      done
    
      rm -f "$dblistfile"
    
      echo "__DB__$SERVICENAME backup INFO: ${COUCHDB_INSTANCE} - backed up $iDbCount dbs of $iDbTotal total"
    
    }
    
    # ---------- RESTORE
    #
    # example: 
    #
    # (1)
    # cd /var/iml-archive/couchdb2
    # or
    # cd /var/iml-backup/couchdb2
    #
    # (2)
    # /opt/imlbackup/client/localdump.sh restore couchdb2 measured-preview-couchdbcluster/mydb.couchdbdump.gz axel-01
    #                                    ^       ^        ^                                                   ^
    #                                    |       |        |                                                   |
    #     action: restore ---------------+       |        |                                                   |
    #     database service: couchdb2 ------------+        |                                                   |
    #     filename with instance as relative path --------+                                                   |
    #     optional: target database --------------------------------------------------------------------------+
    #
    
    # restore a single backup file; the instance and db name will be detected from file
    # param  string  filename of db dump (full path or relative to BACKUP_TARGETDIR)
    # param  string  optional: target database; default: detect name from import database 
    function restoreByFile(){
      sMyfile=$1
      dbname=$2
    
      bFastMode=0 # 0 = delete db first and import | 1 = create and import (on empty instance only)
    
      echo
      h2 "analyze dump $sMyfile"
    
      COUCHDB_INSTANCE=$(echo $sMyfile | sed "s#${BACKUP_TARGETDIR}##g" | sed "s#\./##g" | sed "s#^/##g" | cut -f 1 -d "/")
      echo "detected COUCHDB_INSTANCE   : [${COUCHDB_INSTANCE}]"
      if [ -z "$COUCHDB_INSTANCE" ]; then
        echo "ERROR: Name of the instance was not detected."
        echo "       For couchdb restore you should cd to the ${BACKUP_TARGETDIR} or ${ARCHIVE_DIR}"
        exit 1
      fi
    
      local _sourceDB="$( guessDB $sMyfile | sed 's#.couchdbdump.gz$##' )"
      echo "detected source database    : [${_sourceDB}]"
    
      if [ -z "$dbname" ]; then
        dbname="$_sourceDB"
        echo "using the same as target    : [${dbname}]"
      else
        echo "using db schema from param 2: [${dbname}]"
      fi
    
      echo
    
      loadInstance $COUCHDB_INSTANCE
      
      if [ $bFastMode -eq 0 ]; then
        echo connect $couchdbhost on port $couchdbport with user $couchdbuser
        curl --head -X GET $COUCH_URL 2>/dev/null | grep "^HTTP.* 200 " >/dev/null
        if [ $? -ne 0 ]; then
            color error
            echo ERROR: couch DB instance is not available
            curl -X GET $COUCH_URL
            color reset
            exit 1
        fi
        color ok
        echo OK
        color reset
      fi
    
      echo
    
      # _getDblist | grep "^${dbname}$"
      # if [ $? -eq 0 ]; then
      #   echo DB exists ... need to drop it first
      # fi
    
      if [ $bFastMode -eq 0 ]; then
        h2 deleting database [$dbname] ...
        color cmd
        _couchapi DELETE $dbname
        fetchrc
        color reset
      fi
    
      h2 creating database [$dbname] ...
      color cmd
      _couchapi PUT $dbname
      fetchrc
      color reset
    
      h2 import file ...
      color cmd
      zcat ${sMyfile} | couchrestore --db $dbname
      fetchrc
      color reset
    
      h2 add security infos ...
      # todo: this will fail when restoring from "deleted_databases" folder
      SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${_sourceDB}.json
      SECDATA="$( cat $SECURITYFILE )"
      color cmd
      echo "add security data: $SECDATA"
      _couchapi PUT "${dbname}/_security" "$SECDATA"
      fetchrc
      color reset
    
      echo
    
    }
    
    # --------------------------------------------------------------------------------
    # MAIN
    # --------------------------------------------------------------------------------
    
    
    # ----- check requirements
    
    # --- is a couchd here
    # j_requireProcess "couchdb"   1
    
    # --- very specific :-/ ... check available config files
    ls -1 ${CFGDIR}/* >/dev/null 2>&1
    rc=$rc+$?
    
    
    if [ $rc -eq 0 ]; then
      echo OK: couchdb2 config was found on this system ... checking requirements for backup ...
    
      j_requireBinary  "curl"         1
      j_requireBinary  "couchbackup"  1
      j_requireBinary  "couchrestore" 1
    
      #ls ${dirPythonPackages}/couchdb/tools/dump.py ${dirPythonPackages}/couchdb/tools/load.py >/dev/null && echo "OK: python couchdb tools were found"
      #rc=$rc+$?
    
    
      if [ $rc -eq 0 ]; then
        echo
    
        if [ "$1" = "restore" ]; then
          echo
          shift 1
          restoreByFile $*
    
        else
          shift 1
    
          # remove keyword ALL which is used for localdump.sh to loop over all db types
          test "$1" = "ALL" && shift 1
    
          doBackup $*
        fi
    
      else
        color error
        echo ERROR: Couchdb is here but I am missing things for the backup :-/
        color reset
      fi
    
    else
      rc=0
      echo "__DB__$SERVICENAME SKIP: couchdb2 config does not seem to be here"
    fi
    
    
    echo "__DB__$SERVICENAME INFO: $0 $* [$SERVICENAME] final returncode rc=$rc"
    
    # --------------------------------------------------------------------------------