diff --git a/plugins/localdump/couchdb2.sh b/plugins/localdump/couchdb2.sh index 997989337beefdf6f257bee6224a49751b4ba0fb..1daa1098d569c0301318787dbb24cef19e5ef73e 100755 --- a/plugins/localdump/couchdb2.sh +++ b/plugins/localdump/couchdb2.sh @@ -80,13 +80,12 @@ function _getDblist(){ _couchapi GET _all_dbs | sed 's#\"#\n#g' | grep -Ev "^(\[|\,|\])$" | grep -v _replicator | grep -v _global_changes } +# UNUSED # get value update_seq of given couchdb name -function _getDbSeq(){ - # _couchapi GET $1 | sed 's#,\"#\n"#g' | egrep -v "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' - _couchapi GET "$1" | sed 's#,\"#\n"#g' | grep -Ev "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' | cut -f 1 -d '-' -} - - +# function _getDbSeq(){ +# # _couchapi GET $1 | sed 's#,\"#\n"#g' | egrep -v "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' +# _couchapi GET "$1" | sed 's#,\"#\n"#g' | grep -Ev "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' | cut -f 1 -d '-' +# } # active curl prozesses function curlCount(){ @@ -104,6 +103,13 @@ function wait4curlcount(){ test $iCount -gt $iContinue && wait4curlcount $iContinue } +# optimized curl requests to get metadata from all databases +# used in _doBackupOfSingleInstance +# it returns a JSON line for each database +# +# param integer iChunksize count urls per curl command +# param integer iParallel count of curl processes +# param string dblistfile path+file to list of database function reqCombined(){ typeset -i local iChunksize; iChunksize=$1 typeset -i local iParallel; iParallel=$2 @@ -112,7 +118,6 @@ function reqCombined(){ typeset -i iCounter=0 cmdline= - # for mydb in $dblist for mydb in $( cat $dblistfile ) do @@ -123,7 +128,8 @@ function reqCombined(){ if [ $iCounter -ge $iChunksize ]; then curl -s $cmdline & - # warte, solange Zahl curl Prozesse kleiner Maximum + + # wait untile count of curl proecses is lower maximum wait4curlcount $iParallel iCounter=0 @@ -185,7 +191,7 @@ function doBackup(){ echo "--- instance: $COUCHDB_INSTANCE" if curl --head -X GET "$COUCH_URL" 2>/dev/null | grep "^HTTP.* 200 "; then echo OK, connected. - sleep 2 + sleep 1 _doBackupOfSingleInstance else @@ -216,15 +222,18 @@ function _doBackupOfSingleInstance(){ test -d "$_dir" || (echo "creating $_dir" ; mkdir -p "$_dir" ) done + local iChunksize=100 + local iParallel=6 + + local sSequenceCurrent + local sSequenceLast + echo "--- $( date ) Get list of all databases" - # get a list of current databases - # dblist=$( _getDblist ) local dblistfile dblistfile="/tmp/dblist_${COUCHDB_INSTANCE}" _getDblist >"${dblistfile}" - # typeset -i iDbTotal=$( echo "$dblist" | wc -l ) - typeset -i iDbTotal=$( cat "$dblistfile" | wc -l ) + typeset -i iDbTotal=$( wc -l < "$dblistfile") typeset -i iDb=0 # counter for number of database in the loop typeset -i iDbCount=0 # counter for backed up databases echo "${COUCHDB_INSTANCE} has $iDbTotal databases" @@ -242,7 +251,6 @@ function _doBackupOfSingleInstance(){ dbname=${dbname/%.couchdbdump.gz//} dbname=${dbname/\/} - # if ! grep "^${dbname}" <<< "$dblist" >/dev/null; then if ! grep "^${dbname}" "${dblistfile}" >/dev/null; then SEQFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq/__seq__${dbname} SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${dbname}.json @@ -252,7 +260,6 @@ function _doBackupOfSingleInstance(){ mv "${SECURITYFILE}" "${ARCHIVE_DIR2}/security/" fi done - # done | tee /tmp/couch_archive_${COUCHDB_INSTANCE}.txt echo echo "--- $( date ) DUMP databases" @@ -261,36 +268,24 @@ function _doBackupOfSingleInstance(){ echo " ARCHIVE ${ARCHIVE_DIR}/${COUCHDB_INSTANCE}" echo - # TODO: optimze creation of hash with database and sequence id - local iChunksize=100 - local iParallel=6 - echo "--- $( date ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per connection" - seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"') - # seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' ' | awk '{ sub(/-.*/, "\1", $2 ); print $1 " " $2 }' ) - echo "$seq" | head - + echo "----- $( date ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per process" + seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' ' | awk '{ sub(/-.*/, "", $2 ); print $1 "," $2 }' ) + # ^ ^ ^ ^ ^ + # db_name + update_seq in a single line --+ | | | and back: space to comma --+ + # delete quotes --+ | +-- remove string after first minus char + # comma to space (for awk values) --+ + # the result is ... echo "$seq" | head -3 + # _users,7688 + # candidate-00649860284626638ac6fd12bf000df5,40 + # candidate-04561cddbd0fa305714b48a57929d8b4,3 + + echo "----- $( date ) - reading sequence ids..." declare -A aSeq - - echo "--- $( date ) - reading sequence ids..." for line in $( echo "$seq" ) do - IFS="," read -r db seqvalue <<< "$line" - IFS="-" read -r seqid dummy <<< "$seqvalue" - # seqid=$( echo $seqvalue | cut -f 1 -d '-') + IFS="," read -r db seqid <<< "$line" aSeq+=([$db]=$seqid) done - # for line in $( echo "$seq" ) - # do - # IFS=" " read -r db seqid <<< "$line" - # echo "... $line ... $db - $seqid" - # aSeq+=([$db]=$seqid) - # done - - dbname="_users" - - # echo "Seq ID von $dbname: ${aSeq[$dbname]}" - # echo - # exit for dbname in $( cat "$dblistfile" ) do @@ -303,13 +298,8 @@ function _doBackupOfSingleInstance(){ # sSequenceCurrent=$(_getDbSeq "${dbname}") sSequenceCurrent="${aSeq[$dbname]}" - sSequenceLast=$(cat "${SEQFILE}" 2>/dev/null | cut -f 1 -d '-') -# sSequenceLast=`cat ${SEQFILE} 2>/dev/null | tr -d '\n'` - # echo - # echo "update_seq --+-- current [${sSequenceCurrent}]" - # echo " +-- backup [${sSequenceLast}]" if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] && [ -f "$ARCHIVFILE" ]; then echo "SKIP: still on sequence ${sSequenceLast}"