#!/bin/bash
# ================================================================================
#
# LOCALDUMP :: COUCHDB2 - using nodejs tools couchbackup and couchrestore
# https://github.com/cloudant/couchbackup
#
# Backup:
# - creates gzipped plain text backups (JSON) from each scheme
# - write sequence id into a text file
# - store extra file with security infos
# - latest backup set is written to archive
#
# --------------------------------------------------------------------------------
# ah - Axel Hahn <axel.hahn@iml.unibe.ch>
# ds - Daniel Schueler <daniel.schueler@iml.unibe.ch>
#
# 2019-11-13  .....  v1.0  initial version with backup and restore (single DB)
# 2020-05-19  .....  v1.1  backup a single or multiple couchdb instances by globbing param
#                          ./localdump.sh backup couchdb2 demo
# 2021-10-11  .....  v1.2  added fastmode in restore: no test connect, do not 
#                          delete DB before create request
# 2022-01-20         v1.3  fixes with shellcheck
# 2022-03-17         v1.4  WIP: add lines with prefix __DB__
# 2022-04-07         v1.5  check archive file, not only seq file
# 2022-04-14         v1.6  backup security infos (no restore yet)
# 2022-04-21         v1.7  restore security infos
# 2022-10-07  ah     v1.8  unescape regex with space to prevent "grep: warning: stray \ before white space"
# 2023-06-06  ah     v1.9  show a warning if the sequence id was not fetched
# 2023-06-12  ah     v1.10 skip couchdb dump if no sequence id was detected (=db deleted since fetching list of all dbs)
# 2023-06-26  ah     v1.11 speed up detection of changed databases
# 2023-06-27  ah     v1.12 enable tmp file for dblist again (faster); speedup loops in backup
# 2023-06-28  ah     v1.13 optimize backup move OUTFILE; measure time; cache backed up sequence ids
# ================================================================================

if [ -z "$BACKUP_TARGETDIR" ]; then
  echo "ERROR: you cannot start $(basename "$0") directly"
  rc=$rc+1
  exit 1
fi

# --------------------------------------------------------------------------------
# CONFIG
# --------------------------------------------------------------------------------

# contains *.config files for each instance
CFGDIR=~/.iml_backup/couchdb2

# UNUSED
# dirPythonPackages=/usr/lib/python2.7/site-packages

# now set in localdump.sh
# ARCHIVE_DIR=$(_j_getvar "${JOBFILE}" dir-dbarchive)/couchdb2

# --------------------------------------------------------------------------------
# FUNCTIONS
# --------------------------------------------------------------------------------

# make an couch api request
# param  string  method ... one of GET|POST|DELETE
# param  string  relative url, i.e. _all_dbs or _stats
# param  string  optional: data for POST|PUT requests
function _couchapi(){
  local method=$1
  local apiurl=$2
  # local outfile=$3
  local data=$3

  sParams=
  # sParams="$sParams -u ${couchdbuser}:${couchdbpw}"
  sParams="$sParams -X ${method}"
  sParams="$sParams ${COUCH_URL}${apiurl}"
  # if [ ! -z "$outfile" ]; then
  #   sParams="$sParams -o ${outfile}"
  # fi
  if [ -n "$data" ]; then
    sParams="$sParams -d ${data}"
  fi
  curl $sParams 2>/dev/null
}

function _getDblist(){
   _couchapi GET _all_dbs | sed 's#\"#\n#g' | grep -Ev "^(\[|\,|\])$" | grep -v _replicator | grep -v _global_changes
}

# UNUSED
# get value update_seq of given couchdb name
# function _getDbSeq(){
#   # _couchapi GET $1 | sed 's#,\"#\n"#g' | egrep -v "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"'
#   _couchapi GET "$1" | sed 's#,\"#\n"#g' | grep -Ev "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' | cut -f 1 -d '-'
# }

# active curl prozesses
function curlCount(){
  ps -ef | grep -v grep | grep "curl" | wc -l
}

# wait until min N curl prozesses exist
function wait4curlcount(){
  typeset -i local iContinue
  typeset -i local iCount

  iContinue=${1:-0}
  iCount=$( curlCount )

  test $iCount -gt $iContinue && wait4curlcount $iContinue
}

# optimized curl requests to get metadata from all databases
# used in _doBackupOfSingleInstance
# it returns a JSON line for each database
#
# param  integer  iChunksize  count urls per curl command
# param  integer  iParallel   count of curl processes
# param  string   dblistfile  path+file to list of database
function reqCombined(){
  local iChunksize; typeset -i iChunksize; iChunksize=$1
  local iParallel;  typeset -i iParallel;  iParallel=$2
  local dblistfile;                        dblistfile="$3"

  typeset -i iCounter=0
  cmdline=

  for mydb in $( cat $dblistfile )
  do 

      iCounter+=1
      test -n "$cmdline" && cmdline+=" -: "
      cmdline+="${COUCH_URL}${mydb} "

      if [ $iCounter -ge $iChunksize ]; then

          curl -s $cmdline &

          # wait untile count of curl proecses is lower maximum
          wait4curlcount $iParallel

          iCounter=0
          cmdline=
      fi

  done
  test -n "${cmdline}" && curl -s $cmdline &

  wait4curlcount 0
}



# ---------- CONFIG/ INSTANCES

# get valid configured instances
function getInstances(){
 for mycfg in $(ls -1 ${CFGDIR}/*${1}*.config)
 do
   if . "$mycfg"; then
     echo $(basename "${mycfg}" | cut -f 1 -d ".")
   fi
 done
}


# load the config of an existing instance
# see getInstances to get valid names
# param  string  name of the instance to load
function loadInstance(){
  COUCH_URL=
  if ! . "${CFGDIR}/${1}.config"; then
    color error
    echo ERROR: invalid instance: $1 - the config file cannot be sourced
    color reset
    exit 1
  fi
  if [ -z "${COUCH_URL}" ]; then
    color error
    echo "ERROR: invalid instance: $1 - the config file has no COUCH_URL"
    color reset
    exit 1
  fi

}


# ---------- BACKUP

# backup with loop over instances
# param 1  string  globbing filter to config files
function doBackup(){
  # # for mycfg in `ls -1 ~/.iml_backup/couchdb/*.config`
  # for COUCHDB_INSTANCE in $(getInstances $1)
  # do
  #   loadInstance "$COUCHDB_INSTANCE"

      echo "--- instance: $PROFILENAME"
      if curl --head -X GET "$COUCH_URL" 2>/dev/null | grep "^HTTP.* 200 "; then
        echo OK, connected.
        sleep 1
        _doBackupOfSingleInstance

      else
        rc=$rc+1
        color error
        echo "ERROR: couch DB instance is not available or canot be accessed with these credentials in config file"
        # repeat curl to show the error message
        curl -X GET "$COUCH_URL"
        color reset
      fi

    echo
    echo "--- $(date) done."
    echo
  # done
}

# make backup of all databases in a couchdb instance
# global: COUCH_URL
# global: PROFILENAME
function _doBackupOfSingleInstance(){

  create_targetdir
  local ARCHIVE_DIR2="${ARCHIVE_DIR}/deleted_databases"
  for _dir in "${ARCHIVE_DIR}"      "${ARCHIVE_DIR}/seq"  "${ARCHIVE_DIR}/security" \
              "${ARCHIVE_DIR2}"     "${ARCHIVE_DIR2}/seq" "${ARCHIVE_DIR2}/security"
  do
    test -d "$_dir" || (echo "creating $_dir" ; mkdir -p "$_dir" )
  done

  local iChunksize=100
  local iParallel=6

  local dblistfile
  local sSequenceCurrent
  local sSequenceLast
  local OUTFILE
  local ARCHIVFILE
  local SEQFILE
  local SECURITYFILE
  local iTsStart; typeset -i iTsStart
  local iTsTotal; typeset -i iTsTotal
  local iDbPerSec; typeset -i iDbPerSec

  dblistfile="/tmp/dblist_${PROFILENAME}.txt"

  # this is just a caching file of the sequence id of the last backup and can be safely deleted.
  seqfile="${ARCHIVE_DIR}/seq/all_seqids_of_last_backups_cache.txt"

  echo "--- $( date ) Get list of all databases"
  _getDblist >"${dblistfile}"

  typeset -i iDbTotal; iDbTotal=$( wc -l < "$dblistfile")
  typeset -i iDb=0        # counter for number of database in the loop
  typeset -i iDbCount=0   # counter for backed up databases
  echo "${PROFILENAME} has $iDbTotal databases"

  # detect deleted databases: 
  echo
  echo "--- $( date ) MOVE deleted databases "
  echo "... into ${ARCHIVE_DIR2}"
  echo
  for dumpfile in $( find "${ARCHIVE_DIR}/" -maxdepth 1 -type f -name "*.couchdbdump.gz" )
  do
      # extract database name: get basename and cut extension
      # dbname=$( basename $dumpfile | sed "s#\.couchdbdump\.gz##g" )
      dbname=${dumpfile##*/}
      dbname=${dbname/%.couchdbdump.gz//}
      dbname=${dbname/\/}

      if ! grep "^${dbname}" "${dblistfile}"  >/dev/null; then
              SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname}
              SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json
              echo "DELETED $dbname ... $( ls -l ${dumpfile} | cut -f 5- -d ' ' )"
              mv "${dumpfile}"     "${ARCHIVE_DIR2}"
              mv "${SEQFILE}"      "${ARCHIVE_DIR2}/seq/"
              mv "${SECURITYFILE}" "${ARCHIVE_DIR2}/security/"
      fi
  done

  echo
  echo "--- $( date ) DUMP databases"
  echo "    of instance ${PROFILENAME}: $iDbTotal databases"
  echo "    TO BACKUP ${BACKUP_TARGETDIR}"
  echo "      ARCHIVE ${ARCHIVE_DIR}"
  echo

  echo "----- $( date ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per process"
  seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' '  | awk '{ sub(/-.*/, "", $2 ); print $1 "," $2  }' )
  #                                                                                             ^        ^           ^                    ^                              ^
  #                                                     db_name + update_seq in a single line --+        |           |                    |   and back: space to comma --+
  #                                                                                      delete quotes --+           |                    +-- remove string after first minus char
  #                                                                                comma to space (for awk values) --+
  # the result is ... echo "$seq" | head -3
  # _users,7688
  # candidate-00649860284626638ac6fd12bf000df5,40
  # candidate-04561cddbd0fa305714b48a57929d8b4,3

  echo "----- $( date ) - reading current sequence ids..."
  declare -A aSeq
  for line in $( echo "$seq" )
  do
    IFS="," read -r db seqid <<< "$line"
    aSeq+=([$db]=$seqid)
  done

  echo "----- $( date ) - reading sequence ids of last backup..."
  declare -A aSeqBackup
  for line in $( cat "${seqfile}" 2>/dev/null )
  do
    IFS="," read -r db seqid <<< "$line"
    aSeqBackup+=([$db]=$seqid)
  done

  iTsStart=$( date +%s)
  for dbname in $( cat "$dblistfile" )
  do
    iDb+=1
    echo -n "----- $(date) ${PROFILENAME} -- $iDb of $iDbTotal - ${dbname} - "

    # set later .. OUTFILE=${BACKUP_TARGETDIR}/${COUCHDB_INSTANCE}/$(get_outfile "${dbname}").couchdbdump
    ARCHIVFILE=${ARCHIVE_DIR}/${dbname}.couchdbdump.gz
    SEQFILE=${ARCHIVE_DIR}/seq/__seq__${dbname}
    SECURITYFILE=${ARCHIVE_DIR}/security/__security__${dbname}.json

    # sSequenceCurrent=$(_getDbSeq "${dbname}")
    sSequenceCurrent="${aSeq[$dbname]}"
    
    # sSequenceLast=$(cat "${SEQFILE}" 2>/dev/null | cut -f 1 -d '-')
    sSequenceLast="${aSeqBackup[$dbname]:-$(cat ${SEQFILE} 2>/dev/null | cut -f 1 -d '-')}"

    aSeqBackup[${dbname}]=$sSequenceLast
    if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] && [ -f "$ARCHIVFILE" ]; then
      echo "SKIP: still on sequence ${sSequenceLast}"

      # add security file for already existing databases 
      test -f  "${SECURITYFILE}" || (
        echo "INFO: creating missing security file ${SECURITYFILE}"
        _couchapi GET "${dbname}/_security" > "${SECURITYFILE}"
      )
      
    else
      OUTFILE=${BACKUP_TARGETDIR}/$(get_outfile "${dbname}").couchdbdump
      if [ -z "$sSequenceCurrent" ]; then
        echo "WARNING: unable to fetch current sequence ID - maybe the database was deleted."
      else
        echo
        echo "update_seq --+-- current [${sSequenceCurrent}]" 
        echo "             +-- backup  [${sSequenceLast}]"
        echo -n "Need to backup ... "

        # TODO
        # check command line
        echo couchbackup --db "${dbname}" >"${OUTFILE}".progress 2>/dev/null && mv "${OUTFILE}".progress "${OUTFILE}"
        # exit 1;
        fetchrc

        # $myrc is last returncode - set in fetchrc
        if [ $myrc -eq 0 ]; then
          echo -n "gzip ... "
          compress_file "$OUTFILE"
          fetchrc
          if [ $myrc -eq 0 ]; then
            iDbCount+=1

            aSeqBackup[${dbname}]=${sSequenceCurrent}
            # flushing cached information
            rm -f "${seqfile}" 2>/dev/null

            cp "${OUTFILE}"* "${ARCHIVFILE}"                             \
              && echo "${sSequenceCurrent}">"${SEQFILE}"                 \
              && _couchapi GET "${dbname}/_security" > "${SECURITYFILE}"
            ls -l "${ARCHIVFILE}" "${SEQFILE}" "${SECURITYFILE}"
          fi
        else
          echo "ERROR occured while dumping - abort"
        fi
        ls -l "$OUTFILE"*
        echo
      fi # if [ -z "$sSequenceCurrent" ]; then
    fi # if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] ...
  done
  iTsTotal=$( date +%s)-$iTsStart
  iDbPerSec=$iDbTotal/$iTsTotal

  echo "----- $( date ) - writing sequence ids ..."
  rm -f "${seqfile}" 2>/dev/null
  for key in "${!aSeqBackup[@]}"; do
    echo "$key,${aSeqBackup[$key]}" >> "${seqfile}"
  done
  ls -l "${seqfile}"
  echo

  rm -f "$dblistfile"

  echo "__DB__$SERVICENAME backup INFO: ${PROFILENAME} - backed up $iDbCount dbs of $iDbTotal total ... in $iTsTotal sec ($iDbPerSec databases per sec)"

}

# ---------- RESTORE
#
# example: 
#
# (1)
# cd /var/iml-archive/couchdb2
# or
# cd /var/iml-backup/couchdb2
#
# (2)
# /opt/imlbackup/client/localdump.sh restore couchdb2 measured-preview-couchdbcluster/mydb.couchdbdump.gz axel-01
#                                    ^       ^        ^                                                   ^
#                                    |       |        |                                                   |
#     action: restore ---------------+       |        |                                                   |
#     database service: couchdb2 ------------+        |                                                   |
#     filename with instance as relative path --------+                                                   |
#     optional: target database --------------------------------------------------------------------------+
#

# restore a single backup file; the instance and db name will be detected from file
# param  string  filename of db dump (full path or relative to BACKUP_TARGETDIR)
# param  string  optional: target database; default: detect name from import database 
function restoreByFile(){
  sMyfile=$1
  dbname=$2

  bFastMode=0 # 0 = delete db first and import | 1 = create and import (on empty instance only)

  echo
  h2 "analyze dump $sMyfile"

  # COUCHDB_INSTANCE=$(echo $sMyfile | sed "s#${BACKUP_TARGETDIR}##g" | sed "s#\./##g" | sed "s#^/##g" | cut -f 1 -d "/")
  # echo "detected COUCHDB_INSTANCE   : [${COUCHDB_INSTANCE}]"
  # if [ -z "$COUCHDB_INSTANCE" ]; then
  #   echo "ERROR: Name of the instance was not detected."
  #   echo "       For couchdb restore you should cd to the ${BACKUP_TARGETDIR} or ${ARCHIVE_DIR}"
  #   exit 1
  # fi

  local _sourceDB="$( guessDB $sMyfile | sed 's#.couchdbdump.gz$##' )"
  echo "detected source database    : [${_sourceDB}]"

  if [ -z "$dbname" ]; then
    dbname="$_sourceDB"
    echo "using the same as target    : [${dbname}]"
  else
    echo "using db schema from param 2: [${dbname}]"
  fi

  echo

  # loadInstance $COUCHDB_INSTANCE
  
  if [ $bFastMode -eq 0 ]; then
    echo connect $couchdbhost on port $couchdbport with user $couchdbuser
    curl --head -X GET $COUCH_URL 2>/dev/null | grep "^HTTP.* 200 " >/dev/null
    if [ $? -ne 0 ]; then
        color error
        echo ERROR: couch DB instance is not available
        curl -X GET $COUCH_URL
        color reset
        exit 1
    fi
    color ok
    echo OK
    color reset
  fi

  echo

  # _getDblist | grep "^${dbname}$"
  # if [ $? -eq 0 ]; then
  #   echo DB exists ... need to drop it first
  # fi

  if [ $bFastMode -eq 0 ]; then
    h2 deleting database [$dbname] ...
    color cmd
    _couchapi DELETE $dbname
    fetchrc
    color reset
  fi

  h2 creating database [$dbname] ...
  color cmd
  _couchapi PUT $dbname
  fetchrc
  color reset

  h2 import file ...
  color cmd
  zcat ${sMyfile} | couchrestore --db $dbname
  fetchrc
  color reset

  h2 add security infos ...
  # todo: this will fail when restoring from "deleted_databases" folder
  SECURITYFILE="${ARCHIVE_DIR}/security/__security__${_sourceDB}.json"
  SECDATA="$( cat $SECURITYFILE )"
  color cmd
  echo "add security data: $SECDATA"
  _couchapi PUT "${dbname}/_security" "$SECDATA"
  fetchrc
  color reset

  echo

}

# --------------------------------------------------------------------------------
# MAIN
# --------------------------------------------------------------------------------


# ----- check requirements

# --- is a couchd here
# j_requireProcess "couchdb"   1

# --- very specific :-/ ... check available config files
ls -1 ${CFGDIR}/* >/dev/null 2>&1
rc=$rc+$?


if [ $rc -eq 0 ]; then
  # echo OK: couchdb2 config was found on this system ... checking requirements for backup ...

  j_requireBinary  "curl"         1
  j_requireBinary  "couchbackup"  1
  j_requireBinary  "couchrestore" 1

  #ls ${dirPythonPackages}/couchdb/tools/dump.py ${dirPythonPackages}/couchdb/tools/load.py >/dev/null && echo "OK: python couchdb tools were found"
  #rc=$rc+$?


  if [ $rc -eq 0 ]; then
    echo

    if [ "$1" = "restore" ]; then
      echo
      shift 1
      restoreByFile $*

    else
      shift 1

      # remove keyword ALL which is used for localdump.sh to loop over all db types
      test "$1" = "ALL" && shift 1

      doBackup $*
    fi

  else
    color error
    echo ERROR: Couchdb is here but I am missing things for the backup :-/
    color reset
  fi

else
  rc=0
  echo "__DB__$SERVICENAME SKIP: couchdb2 config does not seem to be here"
fi


echo "__DB__$SERVICENAME INFO: $0 $* [$SERVICENAME] final returncode rc=$rc"

# --------------------------------------------------------------------------------