#!/usr/bin/env bash
#/ Usage: ghe-backup-storage
#/ Take an online, incremental snapshot of all Alambic Storage data using the
#/ calculated routes method.
#/
#/ Note: This command typically isn't called directly. It's invoked by
#/ ghe-backup.
set -e

# Bring in the backup configuration
# shellcheck source=share/github-backup-utils/ghe-backup-config
. "$( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config"

bm_start "$(basename $0)"

# Set up remote host and root backup snapshot directory based on config
host="$GHE_HOSTNAME"
backup_dir="$GHE_SNAPSHOT_DIR/storage"

# Verify rsync is available.
if ! rsync --version 1>/dev/null 2>&1; then
  log_error "rsync not found." 1>&2
  exit 1
fi

# Perform a host-check and establish GHE_REMOTE_XXX variables.
ghe_remote_version_required "$host"

# Split host:port into parts
port=$(ssh_port_part "$GHE_HOSTNAME")
host=$(ssh_host_part "$GHE_HOSTNAME")

# Add user / -l option
user="${host%@*}"
[ "$user" = "$host" ] && user="admin"

hostnames=$host
ssh_config_file_opt=
tempdir=$(mktemp -d -t backup-utils-backup-XXXXXX)
remote_tempdir=$(ghe-ssh "$GHE_HOSTNAME" -- mktemp -d -t backup-utils-backup-XXXXXX)
routes_list=$tempdir/routes_list
remote_routes_list=$remote_tempdir/remote_routes_list
opts="$GHE_EXTRA_SSH_OPTS"

# storage server hostnames under cluster
if [ "$GHE_BACKUP_STRATEGY" = "cluster" ]; then
  ssh_config_file="$tempdir/ssh_config"
  ssh_config_file_opt="-F $ssh_config_file"
  opts="$opts -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no"
  hostnames=$(ghe-cluster-find-nodes "$GHE_HOSTNAME" "storage-server")
  ghe-ssh-config "$GHE_HOSTNAME" "$hostnames" > "$ssh_config_file"
fi

# Replica hostnames for HA
if ghe-ssh "$GHE_HOSTNAME" -- "[ -f '$GHE_REMOTE_ROOT_DIR/etc/github/repl-state' ]"; then
  ha_replica_hosts=$(ghe-ssh "$GHE_HOSTNAME" ghe-cluster-nodes --replica)
fi

# Make sure root backup dir exists if this is the first run
mkdir -p "$backup_dir"

# Removes the remote sync-in-progress file on exit, re-enabling GC operations
# on the remote instance.
cleanup() {
  # Enable remote maintenance operations
  for hostname in $hostnames; do
    ghe-gc-enable $ssh_config_file_opt $hostname:$port || {
      log_warn "Re-enable gc on $hostname failed, please manually delete $SYNC_IN_PROGRESS_FILE" 1>&2
    }
  done

  # Enable remote GC operations for HA replica
  for replica_host in $ha_replica_hosts; do
    echo "set -o pipefail; ssh $replica_host -- 'sudo rm -f $SYNC_IN_PROGRESS_FILE'" | ghe-ssh "$host" /bin/bash || {
      echo "Re-enable gc on $replica_host failed, please manually delete $SYNC_IN_PROGRESS_FILE" 1>&2
    }
  done

  ghe-ssh "$GHE_HOSTNAME" -- rm -rf $remote_tempdir
  rm -rf $tempdir
}
trap 'cleanup' EXIT INT

# Disable remote maintenance operations
for hostname in $hostnames; do
  ghe-gc-disable $ssh_config_file_opt $hostname:$port
done

# Disable remote GC operations for HA replica
# gc_disable is a function defined in ghe-backup-config
# gc_disable is called on the replica node via the primary node, because replica node is not expected to be reachable from backup host. But replica node is expected to be reachable from primary node.
for replica_host in $ha_replica_hosts; do
  echo "set -o pipefail; ssh $replica_host -- '$(declare -f gc_disable); gc_disable \"$SYNC_IN_PROGRESS_FILE\" \"$GHE_GIT_COOLDOWN_PERIOD\"'" | ghe-ssh "$host" /bin/bash || {
    echo "Disable gc on $replica_host failed" 1>&2
  }
done

# If we have a previous increment and it is not empty, avoid transferring existing files via rsync's
# --link-dest support. This also decreases physical space usage considerably.
if [ -d "$GHE_DATA_DIR/current/storage" ] && [ "$(ls -A $GHE_DATA_DIR/current/storage)" ]; then
  link_dest="--link-dest=../../current/storage"
fi

# Calculate sync routes. This will store the healthy object paths for each node
#
# This gets a repo path and stores the path in the $node.sync file
# a/nw/a8/3f/02/100000855 storage-server-node1 >> storage-server-node1.sync
# a/nw/a8/bc/8d/100000880 storage-server-node3 >> storage-server-node3.sync
# a/nw/a5/06/81/100000659 storage-server-node2 >> storage-server-node2.sync
# ...
#one route per line.
#
# NOTE: The route generation is performed on the appliance as it is considerably
# more performant than performing over an SSH pipe.
#
bm_start "$(basename $0) - Generating routes"
echo "github-env ./bin/storage-cluster-backup-routes > $remote_routes_list" | ghe-ssh "$GHE_HOSTNAME" -- /bin/bash
ghe-ssh "$GHE_HOSTNAME" -- cat $remote_routes_list | ghe_debug
bm_end "$(basename $0) - Generating routes"

bm_start "$(basename $0) - Fetching routes"
ghe-ssh "$GHE_HOSTNAME" -- gzip -c $remote_routes_list | gzip -d > $routes_list
cat $routes_list | ghe_debug
bm_end "$(basename $0) - Fetching routes"

bm_start "$(basename $0) - Processing routes"
if [ "$GHE_BACKUP_STRATEGY" != "cluster" ]; then
  server=$host
fi
cat $routes_list | awk -v tempdir="$tempdir" -v server="$server" '{ for(i=2;i<=NF;i++){ server != "" ? host=server : host=$i; print $1 > (tempdir"/"host".rsync") }}'
ghe_debug "\n$(find "$tempdir" -maxdepth 1 -name '*.rsync')"
bm_end "$(basename $0) - Processing routes"

if [ -z "$(find "$tempdir" -maxdepth 1 -name '*.rsync')" ]; then
  log_warn "no routes found, skipping storage backup ..."
  exit 0
else
  increment-progress-total-count 2
fi

# rsync all the storage objects
bm_start "$(basename $0) - Storage object sync"
for file_list in $tempdir/*.rsync; do
  hostname=$(basename $file_list .rsync)
  storage_user=$(ghe-ssh $ssh_config_file_opt $hostname:$port -- stat -c %U /data/user/storage || echo git)

  object_num=$(cat $file_list | wc -l)
  ghe_verbose "* Transferring $object_num objects from $hostname"
  log_rsync "BEGIN: storage rsync" 1>&3
  ghe-rsync -avr \
  -e "ssh -q $opts -p $port $ssh_config_file_opt -l $user" \
  $link_dest "$@" \
  --rsync-path="sudo -u $storage_user rsync" \
  --files-from="$file_list" \
  --ignore-missing-args \
  --size-only \
  "$hostname:$GHE_REMOTE_DATA_USER_DIR/storage/" \
  "$backup_dir" 1>&3 &
  log_rsync "END: storage rsync" 1>&3
done

for pid in $(jobs -p); do
  wait $pid
done
bm_end "$(basename $0) - Storage object sync"

if [[ "$GHE_ROUTE_VERIFICATION" == "true" ]]; then
  bm_start "$(basename $0) - Verifying Routes"

  cat $tempdir/*.rsync | uniq | sort | uniq > $tempdir/source_routes
  (cd $backup_dir/ && find * -mindepth 3 -maxdepth 3 -type f -print | uniq | sort | uniq) > $tempdir/destination_routes

  git --no-pager diff --unified=0 --no-prefix -- $tempdir/source_routes $tempdir/destination_routes || echo "Warning: One or more storage objects were not found on the source appliance."

  increment-progress-total-count 1
  bm_end "$(basename $0) - Verifying Routes"
fi

bm_end "$(basename $0)"
