Skip to content
Snippets Groups Projects
Commit 27457793 authored by Hahn Axel (hahn)'s avatar Hahn Axel (hahn)
Browse files

speed up detection of changed databases

parent b3b2551a
No related branches found
No related tags found
1 merge request!108Speedup couchdb backup
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
# 2022-10-07 ah v1.8 unescape regex with space to prevent "grep: warning: stray \ before white space" # 2022-10-07 ah v1.8 unescape regex with space to prevent "grep: warning: stray \ before white space"
# 2023-06-06 ah v1.9 show a warning if the sequence id was not fetched # 2023-06-06 ah v1.9 show a warning if the sequence id was not fetched
# 2023-06-12 ah v1.10 skip couchdb dump if no sequence id was detected (=db deleted since fetching list of all dbs) # 2023-06-12 ah v1.10 skip couchdb dump if no sequence id was detected (=db deleted since fetching list of all dbs)
# 2023-06-26 ah v1.11 speed up detection of changed databases
# ================================================================================ # ================================================================================
if [ -z "$BACKUP_TARGETDIR" ]; then if [ -z "$BACKUP_TARGETDIR" ]; then
...@@ -85,6 +86,55 @@ function _getDbSeq(){ ...@@ -85,6 +86,55 @@ function _getDbSeq(){
} }
# active curl prozesses
function curlCount(){
ps -ef | grep -v grep | grep "curl" | wc -l
}
# wait until min N curl prozesses exist
function wait4curlcount(){
typeset -i local iContinue
typeset -i local iCount
iContinue=${1:-0}
iCount=$( curlCount )
test $iCount -gt $iContinue && wait4curlcount $iContinue
}
function reqCombined(){
typeset -i local iChunksize; iChunksize=$1
typeset -i local iParallel; iParallel=$2
typeset -i iCounter=0
cmdline=
for mydb in $dblist
do
iCounter+=1
test -n "$cmdline" && cmdline+=" -: "
cmdline+="${COUCH_URL}${mydb} "
if [ $iCounter -ge $iChunksize ]; then
curl -s $cmdline &
# warte, solange Zahl curl Prozesse kleiner Maximum
wait4curlcount $iParallel
iCounter=0
cmdline=
fi
done
test -n "${cmdline}" && curl -s $cmdline &
wait4curlcount 0
}
# ---------- CONFIG/ INSTANCES # ---------- CONFIG/ INSTANCES
# get valid configured instances # get valid configured instances
...@@ -168,15 +218,13 @@ function _doBackupOfSingleInstance(){ ...@@ -168,15 +218,13 @@ function _doBackupOfSingleInstance(){
echo echo
# get a list of current databases # get a list of current databases
dblist=/tmp/couch_list_${COUCHDB_INSTANCE}.txt dblist=$( _getDblist )
_getDblist > "$dblist"
ls -l "$dblist"
# detect deleted databases: # detect deleted databases:
for dumpfile in $( find "${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/" -maxdepth 1 -type f -name "*.couchdbdump.gz" ) for dumpfile in $( find "${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/" -maxdepth 1 -type f -name "*.couchdbdump.gz" )
do do
dbname=$( basename $dumpfile | sed "s#\.couchdbdump\.gz##g" ) dbname=$( basename $dumpfile | sed "s#\.couchdbdump\.gz##g" )
if ! grep "^${dbname}" "$dblist" >/dev/null; then if ! grep "^${dbname}" <<< "$dblist" >/dev/null; then
SEQFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq/__seq__${dbname} SEQFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq/__seq__${dbname}
SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${dbname}.json SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${dbname}.json
echo "DELETED $dbname ... $( ls -l ${dumpfile} | cut -f 5- -d ' ' )" echo "DELETED $dbname ... $( ls -l ${dumpfile} | cut -f 5- -d ' ' )"
...@@ -188,7 +236,7 @@ function _doBackupOfSingleInstance(){ ...@@ -188,7 +236,7 @@ function _doBackupOfSingleInstance(){
# done | tee /tmp/couch_archive_${COUCHDB_INSTANCE}.txt # done | tee /tmp/couch_archive_${COUCHDB_INSTANCE}.txt
echo echo
typeset -i iDbTotal=$( cat "$dblist" | wc -l ) typeset -i iDbTotal=$( echo "$dblist" | wc -l )
typeset -i iDb=0 typeset -i iDb=0
typeset -i iDbCount=0 typeset -i iDbCount=0
...@@ -198,16 +246,36 @@ function _doBackupOfSingleInstance(){ ...@@ -198,16 +246,36 @@ function _doBackupOfSingleInstance(){
echo " ARCHIVE ${ARCHIVE_DIR}/${COUCHDB_INSTANCE}" echo " ARCHIVE ${ARCHIVE_DIR}/${COUCHDB_INSTANCE}"
echo echo
for dbname in $( cat "$dblist" ) local iChunksize=100
local iParallel=10
echo "--- $( date ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per connection"
seq=$( reqCombined 100 6 | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"')
declare -A aSeq
echo "--- $( date ) - reading sequence ids..."
for line in $( echo "$seq" )
do do
iDb=$iDb+1 IFS="," read -r db seqvalue <<< "$line"
IFS="-" read -r seqid dummy <<< "$seqvalue"
# seqid=$( echo $seqvalue | cut -f 1 -d '-')
aSeq+=([$db]=$seqid)
done
for dbname in $( echo "$dblist" )
do
iDb+=1
echo -n "----- $(date) ${COUCHDB_INSTANCE} -- $iDb of $iDbTotal - ${dbname} - " echo -n "----- $(date) ${COUCHDB_INSTANCE} -- $iDb of $iDbTotal - ${dbname} - "
OUTFILE=${BACKUP_TARGETDIR}/${COUCHDB_INSTANCE}/$(get_outfile "${dbname}").couchdbdump OUTFILE=${BACKUP_TARGETDIR}/${COUCHDB_INSTANCE}/$(get_outfile "${dbname}").couchdbdump
ARCHIVFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/${dbname}.couchdbdump.gz ARCHIVFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/${dbname}.couchdbdump.gz
SEQFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq/__seq__${dbname} SEQFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq/__seq__${dbname}
SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${dbname}.json SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${dbname}.json
sSequenceCurrent=$(_getDbSeq "${dbname}") # sSequenceCurrent=$(_getDbSeq "${dbname}")
sSequenceCurrent="${aSeq[$dbname]}"
sSequenceLast=$(cat "${SEQFILE}" 2>/dev/null | cut -f 1 -d '-') sSequenceLast=$(cat "${SEQFILE}" 2>/dev/null | cut -f 1 -d '-')
# sSequenceLast=`cat ${SEQFILE} 2>/dev/null | tr -d '\n'` # sSequenceLast=`cat ${SEQFILE} 2>/dev/null | tr -d '\n'`
...@@ -231,6 +299,7 @@ function _doBackupOfSingleInstance(){ ...@@ -231,6 +299,7 @@ function _doBackupOfSingleInstance(){
echo "update_seq --+-- current [${sSequenceCurrent}]" echo "update_seq --+-- current [${sSequenceCurrent}]"
echo " +-- backup [${sSequenceLast}]" echo " +-- backup [${sSequenceLast}]"
echo -n "Need to backup ... " echo -n "Need to backup ... "
couchbackup --db "${dbname}" >"${OUTFILE}".progress 2>/dev/null && mv "${OUTFILE}".progress "${OUTFILE}" couchbackup --db "${dbname}" >"${OUTFILE}".progress 2>/dev/null && mv "${OUTFILE}".progress "${OUTFILE}"
fetchrc fetchrc
...@@ -254,7 +323,6 @@ function _doBackupOfSingleInstance(){ ...@@ -254,7 +323,6 @@ function _doBackupOfSingleInstance(){
fi # if [ -z "$sSequenceCurrent" ]; then fi # if [ -z "$sSequenceCurrent" ]; then
fi # if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] ... fi # if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] ...
done done
rm -f "$dblist"
echo "__DB__$SERVICENAME backup INFO: ${COUCHDB_INSTANCE} - backed up $iDbCount dbs of $iDbTotal total" echo "__DB__$SERVICENAME backup INFO: ${COUCHDB_INSTANCE} - backed up $iDbCount dbs of $iDbTotal total"
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment