#!/bin/sh

set -e
#set -x

ME=$(basename $0)

. /usr/bin/oci-cluster-upgrade-funcs

usage () {
	echo "Usage: $0 CLUSTER_NAME"
	exit 1
}

if [ $# != 1 ] ; then
	usage
fi

CLUSTER_NAME=${1}

#########################
### Utility functions ###
#########################
stop_and_mask_rabbit () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		green_echo "-> Stopping rabbitMQ on ${HOST_NAME}"
		sshi root@${HOST_IP} "rabbitmqctl stop_app && systemctl stop rabbitmq-server.service && systemctl disable rabbitmq-server.service && systemctl mask rabbitmq-server.service"
	fi
}

rm_var_lib_rabbitmq_mnesia () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		green_echo "-> Deleting /var/lib/rabbitmq/mnesia on ${HOST_NAME}"
		sshi root@${HOST_IP} "rm -rf /var/lib/rabbitmq/mnesia"
	fi
}

reset_rabbit_user_db () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		green_echo "-> Reinstalling rabbitmq user db on ${HOST_NAME}"
		sshi root@${HOST_IP} "if ! [ -e /root/reset-rabbitmq-credentials ] ; then oci-puppet ; fi"
		sshi root@${HOST_IP} "/root/reset-rabbitmq-credentials"
	fi
}

rejoin_rabbit_cluster () {
	local HOST HOST_IP HOST_NAME NODE_TO_CLUSTER_WITH
	HOST=${1}
	NODE_TO_CLUSTER_WITH=${2}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		green_echo "-> Getting ${HOST_NAME} to rejoin rabbitmq cluster ${NODE_TO_CLUSTER_WITH}"
		if [ "${USE_QUORUM_QUEUES}" != "yes" ] ; then
			sshi root@${HOST_IP} "oci-auto-join-rabbitmq-cluster ${NODE_TO_CLUSTER_WITH} --no-ha-queues"
		else
			sshi root@${HOST_IP} "oci-auto-join-rabbitmq-cluster ${NODE_TO_CLUSTER_WITH}"
		fi
	fi
}

reboot_host () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	green_echo "-> Rebooting ${HOST_NAME}"
	sshi root@${HOST_IP} "reboot" || true
	sleep 10
	wait_for_ssh ${HOST_IP}
}

start_and_unmask_rabbit () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		green_echo "-> Starting rabbitMQ on ${HOST_NAME}"
		sshi root@${HOST_IP} "systemctl unmask rabbitmq-server.service && systemctl enable rabbitmq-server.service && systemctl start rabbitmq-server.service && rabbitmqctl start_app"
	fi
}

#########################
### Upgrade functions ###
#########################
dist_upgrade_nodes_with_rabbit (){
	local FIRST_NODE OTHER_NODES
	FIRST_NODE=${1}
	OTHER_NODES="${2}"
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		stop_and_mask_rabbit ${FIRST_NODE}
	fi
	fix_grub_install_devices ${FIRST_NODE}
	switch_to_release ${FIRST_NODE} bullseye zed bookworm zed
	distupgrade_everyone ${FIRST_NODE}
	reboot_host ${FIRST_NODE}
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		rm_var_lib_rabbitmq_mnesia ${FIRST_NODE}
		start_and_unmask_rabbit ${FIRST_NODE}
		reset_rabbit_user_db ${FIRST_NODE}
		iterate_on_hosts -p "${OTHER_NODES}" stop_and_mask_rabbit
	fi
	iterate_on_hosts -p "${OTHER_NODES}" fix_grub_install_devices
	iterate_on_hosts -p "${OTHER_NODES}" switch_to_release bullseye zed bookworm zed
	iterate_on_hosts -p "${OTHER_NODES}" distupgrade_everyone
	iterate_on_hosts "${OTHER_NODES}" reboot_host
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		iterate_on_hosts -p "${OTHER_NODES}" rm_var_lib_rabbitmq_mnesia
		iterate_on_hosts "${OTHER_NODES}" start_and_unmask_rabbit
		FIRST_NODE_HOSTNAME=$(echo ${FIRST_NODE} | cut -d, -f2)
		iterate_on_hosts "${OTHER_NODES}" rejoin_rabbit_cluster ${FIRST_NODE_HOSTNAME}
	fi
}

# OCI does not install grub properly, and there's no device were to install
# grub by default. This is to be fixed, but in the mean while, let's fix this
# at least in the virtualized PoC.
fix_grub_install_devices () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	SYSTEM_PRODUCT_NAME=$(sshi root@${HOST_IP} "dmidecode -s system-product-name")
	if [ "${SYSTEM_PRODUCT_NAME}" = "qemu-oci" ] ; then
		green_echo "-> Fixing grub installe devices on ${HOST_NAME}"
		sshi root@${HOST_IP} "echo 'grub-pc grub-pc/install_devices multiselect /dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_drive-scsi0-0-0-0' | debconf-set-selections"
		sshi root@${HOST_IP} "echo 'grub-pc grub-pc/install_devices seen true' | debconf-set-selections"
	fi
	sshi root@${HOST_IP} "if ! dpkg -W zstd 1>/dev/null 2>/dev/null ; then echo '-> Installing zstd' ; DEBIAN_FRONTEND=noninteractive apt-get install zstd -y -o Dpkg::Options::=--force-confold --allow-downgrades ; fi"
}

standard_dist_upgrade_node () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	green_echo "-> Performing upgrade to bookworm on ${HOST_NAME}"
	fix_grub_install_devices ${HOST}
	switch_to_release ${HOST} bullseye zed bookworm zed
	distupgrade_everyone ${HOST}
	reboot_host ${HOST}
}

ceph_disable_rebalance () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	sshi root@${HOST_IP} "ceph osd set noout && ceph osd set norebalance && ceph osd set norecover && ceph osd set nodeep-scrub"
}

ceph_enable_rebalance () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	sshi root@${HOST_IP} "ceph osd unset noout && ceph osd unset norebalance && ceph osd unset norecover && ceph osd unset nodeep-scrub"
}

ceph_upgrade_osd () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	green_echo "-> Launching OSD repair on ${HOST_NAME}"
	CEPH_OSD_PATH=$(sshi root@${HOST_IP} "df | grep ceph | awk '{print \$NF}'" 2>/dev/null)
	for i in ${CEPH_OSD_PATH} ; do
		CEPH_OSD_ID=$(basename $i | cut -d- -f2)
		green_echo "-> Reparing CEPH OSD with ID ${CEPH_OSD_ID}"
		sshi root@${HOST_IP} "systemctl stop ceph-osd@${CEPH_OSD_ID} ; sleep 10 ; ceph osd set noup ; sleep 3 ; ceph-bluestore-tool repair --path /var/lib/ceph/osd/ceph-${CEPH_OSD_ID} ; sleep 5 ; ceph osd unset noup ; sleep 3 ; systemctl start ceph-osd@${CEPH_OSD_ID} ; sleep 10"
	done
}

ceph_upgrade_fix_errors () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f2)
	green_echo "-> Fixing HEALTH_WARN stuff on ${HOST_NAME}"
	sshi root@${HOST_IP} "ceph osd require-osd-release pacific && ceph config set mon auth_allow_insecure_global_id_reclaim false && ceph mon enable-msgr2"
	sshi root@${HOST_IP} "for i in \$(ceph osd lspools | awk '{print \$2}' | grep -v device_health_metrics) ; do ceph osd pool application enable \$i rbd ; done"
}


ceph_upgrade () {
	if [ -n "${ALL_BILLMON}" ] ; then
		ceph_disable_rebalance ${ONE_BILLMON}
		iterate_on_hosts "${ALL_BILLMON}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_BILLOSD}" ] ; then
		iterate_on_hosts "${ALL_BILLOSD}" standard_dist_upgrade_node
		ceph_enable_rebalance ${ONE_BILLMON}
	fi
	if [ -n "${ALL_CEPHMON}" ] ; then
		ceph_disable_rebalance ${ONE_CEPHMON}
		iterate_on_hosts "${ALL_CEPHMON}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_CEPHOSD}" ] ; then
		iterate_on_hosts "${ALL_CEPHOSD}" standard_dist_upgrade_node
		ceph_enable_rebalance ${ONE_CEPHMON}
	fi
	if [ -n "${ALL_BILLOSD}" ] ; then
		iterate_on_hosts "${ALL_BILLOSD}" ceph_upgrade_osd
		ceph_upgrade_fix_errors ${ONE_BILLMON}
	fi
	if [ -n "${ALL_CEPHOSD}" ] ; then
		iterate_on_hosts "${ALL_CEPHOSD}" ceph_upgrade_osd
		ceph_upgrade_fix_errors ${ONE_CEPHMON}
	fi
}

upgrade_all_other_nodes (){
	if [ -n "${ALL_DNS}" ] ; then
		iterate_on_hosts "${ALL_DNS}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_TEMPEST}" ] ; then
		iterate_on_hosts "${ALL_TEMPEST}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_NET}" ] ; then
		iterate_on_hosts "${ALL_NET}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_VOL}" ] ; then
		iterate_on_hosts "${ALL_VOL}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_COMP}" ] ; then
		iterate_on_hosts "${ALL_COMP}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_SWIFTPRX}" ] ; then
		iterate_on_hosts "${ALL_SWIFTPRX}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_SWIFTSTR}" ] ; then
		iterate_on_hosts "${ALL_SWIFTSTR}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_SQL}" ] ; then
		iterate_on_hosts "${ALL_SQL}" standard_dist_upgrade_node
	fi
	if [ -n "${ALL_SQLMSG}" ] ; then
		iterate_on_hosts "${ALL_SQLMSG}" standard_dist_upgrade_node
	fi
}

restart_rabbit_cluster_and_services () {
	local ALL_RABBIT_NODES ONE_RABBIT_NODE OTHER_RABBIT_NODES
	ALL_RABBIT_NODES="${1}"
	ONE_RABBIT_NODE="${2}"
	ONE_RABBIT_HODE_HOSTNAME=$(echo ${ONE_RABBIT_NODE} | cut -d, -f2)
	OTHER_RABBIT_NODES="${3}"
	if [ "${HAS_RABBIT}" = "yes" ] ; then
		# Stop all
		iterate_on_hosts -p "${ALL_RABBIT_NODES}" stop_and_mask_rabbit
		# Reset the mnesia db everywhere
		iterate_on_hosts -p "${ALL_RABBIT_NODES}" rm_var_lib_rabbitmq_mnesia
		# Start on 1st node
		iterate_on_hosts "${ONE_RABBIT_NODE}" start_and_unmask_rabbit
		# Populate with rights
		iterate_on_hosts "${ONE_RABBIT_NODE}" reset_rabbit_user_db
		# Start on all other nodes
		iterate_on_hosts -p "${OTHER_RABBIT_NODES}" start_and_unmask_rabbit
		# Re-join the cluster
		iterate_on_hosts -p "${OTHER_RABBIT_NODES}" rejoin_rabbit_cluster ${ONE_RABBIT_HODE_HOSTNAME}
	fi

	oci_cluster_restart_all_services
}

###########################
### START OF THE SCRIPT ###
###########################
fetch_cluster_info

green_echo "===> Disabling puppet everywhere but on Ceph nodes"
iterate_on_hosts -p "${ALL_NODES_BUT_CEPH}" disable_puppet

# Upgrade controller nodes
dist_upgrade_nodes_with_rabbit ${ONE_CTRL} "${OTHER_CTRL}"
# Restart the rabbit cluster on controllers
restart_rabbit_cluster_and_services "${ALL_CTRL}" "${ONE_CTRL}" "${OTHER_CTRL}"

# Upgrade messaging nodes
if [ -n "${ALL_MSG}" ] ; then
	dist_upgrade_nodes_with_rabbit ${ONE_MSG} "${OTHER_MSG}"
	# Restart the rabbit cluster on messaging nodes
	restart_rabbit_cluster_and_services "${ALL_MSG}" "${ONE_MSG}" "${OTHER_MSG}"
fi

ceph_upgrade
upgrade_all_other_nodes

green_echo "===> Enabling puppet everywhere but on Ceph nodes"
iterate_on_hosts -p "${ALL_NODES_BUT_CEPH}" enable_puppet
