Skip to content
Snippets Groups Projects
Commit f86ace8e authored by Hahn Axel (hahn)'s avatar Hahn Axel (hahn)
Browse files

couchdb2 backup: more performance tuning

parent 88b13d36
Branches
No related tags found
1 merge request!110couchdb2 backup: more performance tuning
......@@ -80,13 +80,12 @@ function _getDblist(){
_couchapi GET _all_dbs | sed 's#\"#\n#g' | grep -Ev "^(\[|\,|\])$" | grep -v _replicator | grep -v _global_changes
}
# UNUSED
# get value update_seq of given couchdb name
function _getDbSeq(){
# _couchapi GET $1 | sed 's#,\"#\n"#g' | egrep -v "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"'
_couchapi GET "$1" | sed 's#,\"#\n"#g' | grep -Ev "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' | cut -f 1 -d '-'
}
# function _getDbSeq(){
# # _couchapi GET $1 | sed 's#,\"#\n"#g' | egrep -v "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"'
# _couchapi GET "$1" | sed 's#,\"#\n"#g' | grep -Ev "^(\[|\,|\])$" | grep update_seq | cut -f 4 -d '"' | cut -f 1 -d '-'
# }
# active curl prozesses
function curlCount(){
......@@ -104,6 +103,13 @@ function wait4curlcount(){
test $iCount -gt $iContinue && wait4curlcount $iContinue
}
# optimized curl requests to get metadata from all databases
# used in _doBackupOfSingleInstance
# it returns a JSON line for each database
#
# param integer iChunksize count urls per curl command
# param integer iParallel count of curl processes
# param string dblistfile path+file to list of database
function reqCombined(){
typeset -i local iChunksize; iChunksize=$1
typeset -i local iParallel; iParallel=$2
......@@ -112,7 +118,6 @@ function reqCombined(){
typeset -i iCounter=0
cmdline=
# for mydb in $dblist
for mydb in $( cat $dblistfile )
do
......@@ -123,7 +128,8 @@ function reqCombined(){
if [ $iCounter -ge $iChunksize ]; then
curl -s $cmdline &
# warte, solange Zahl curl Prozesse kleiner Maximum
# wait untile count of curl proecses is lower maximum
wait4curlcount $iParallel
iCounter=0
......@@ -185,7 +191,7 @@ function doBackup(){
echo "--- instance: $COUCHDB_INSTANCE"
if curl --head -X GET "$COUCH_URL" 2>/dev/null | grep "^HTTP.* 200 "; then
echo OK, connected.
sleep 2
sleep 1
_doBackupOfSingleInstance
else
......@@ -216,15 +222,18 @@ function _doBackupOfSingleInstance(){
test -d "$_dir" || (echo "creating $_dir" ; mkdir -p "$_dir" )
done
local iChunksize=100
local iParallel=6
local sSequenceCurrent
local sSequenceLast
echo "--- $( date ) Get list of all databases"
# get a list of current databases
# dblist=$( _getDblist )
local dblistfile
dblistfile="/tmp/dblist_${COUCHDB_INSTANCE}"
_getDblist >"${dblistfile}"
# typeset -i iDbTotal=$( echo "$dblist" | wc -l )
typeset -i iDbTotal=$( cat "$dblistfile" | wc -l )
typeset -i iDbTotal=$( wc -l < "$dblistfile")
typeset -i iDb=0 # counter for number of database in the loop
typeset -i iDbCount=0 # counter for backed up databases
echo "${COUCHDB_INSTANCE} has $iDbTotal databases"
......@@ -242,7 +251,6 @@ function _doBackupOfSingleInstance(){
dbname=${dbname/%.couchdbdump.gz//}
dbname=${dbname/\/}
# if ! grep "^${dbname}" <<< "$dblist" >/dev/null; then
if ! grep "^${dbname}" "${dblistfile}" >/dev/null; then
SEQFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/seq/__seq__${dbname}
SECURITYFILE=${ARCHIVE_DIR}/${COUCHDB_INSTANCE}/security/__security__${dbname}.json
......@@ -252,7 +260,6 @@ function _doBackupOfSingleInstance(){
mv "${SECURITYFILE}" "${ARCHIVE_DIR2}/security/"
fi
done
# done | tee /tmp/couch_archive_${COUCHDB_INSTANCE}.txt
echo
echo "--- $( date ) DUMP databases"
......@@ -261,36 +268,24 @@ function _doBackupOfSingleInstance(){
echo " ARCHIVE ${ARCHIVE_DIR}/${COUCHDB_INSTANCE}"
echo
# TODO: optimze creation of hash with database and sequence id
local iChunksize=100
local iParallel=6
echo "--- $( date ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per connection"
seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"')
# seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' ' | awk '{ sub(/-.*/, "\1", $2 ); print $1 " " $2 }' )
echo "$seq" | head
echo "----- $( date ) - Get database meta infos ... max $iParallel parralel curl requests sending $iChunksize database urls per process"
seq=$( reqCombined $iChunksize $iParallel "$dblistfile" | jq -r ' [ .db_name, .update_seq ] | @csv ' | tr -d '"' | tr ',' ' ' | awk '{ sub(/-.*/, "", $2 ); print $1 "," $2 }' )
# ^ ^ ^ ^ ^
# db_name + update_seq in a single line --+ | | | and back: space to comma --+
# delete quotes --+ | +-- remove string after first minus char
# comma to space (for awk values) --+
# the result is ... echo "$seq" | head -3
# _users,7688
# candidate-00649860284626638ac6fd12bf000df5,40
# candidate-04561cddbd0fa305714b48a57929d8b4,3
echo "----- $( date ) - reading sequence ids..."
declare -A aSeq
echo "--- $( date ) - reading sequence ids..."
for line in $( echo "$seq" )
do
IFS="," read -r db seqvalue <<< "$line"
IFS="-" read -r seqid dummy <<< "$seqvalue"
# seqid=$( echo $seqvalue | cut -f 1 -d '-')
IFS="," read -r db seqid <<< "$line"
aSeq+=([$db]=$seqid)
done
# for line in $( echo "$seq" )
# do
# IFS=" " read -r db seqid <<< "$line"
# echo "... $line ... $db - $seqid"
# aSeq+=([$db]=$seqid)
# done
dbname="_users"
# echo "Seq ID von $dbname: ${aSeq[$dbname]}"
# echo
# exit
for dbname in $( cat "$dblistfile" )
do
......@@ -303,13 +298,8 @@ function _doBackupOfSingleInstance(){
# sSequenceCurrent=$(_getDbSeq "${dbname}")
sSequenceCurrent="${aSeq[$dbname]}"
sSequenceLast=$(cat "${SEQFILE}" 2>/dev/null | cut -f 1 -d '-')
# sSequenceLast=`cat ${SEQFILE} 2>/dev/null | tr -d '\n'`
# echo
# echo "update_seq --+-- current [${sSequenceCurrent}]"
# echo " +-- backup [${sSequenceLast}]"
if [ "${sSequenceCurrent}" = "${sSequenceLast}" ] && [ -f "$ARCHIVFILE" ]; then
echo "SKIP: still on sequence ${sSequenceLast}"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment