#!/bin/bash
# SPDX-License-Identifier: GPL-3.0+
# Copyright (C) 2017 Omar Sandoval
#
# Hotplug CPUs online and offline while we do IO.

. tests/block/rc
. common/cpuhotplug

DESCRIPTION="do IO while hotplugging CPUs"
TIMED=1
CAN_BE_ZONED=1

requires() {
	_have_cpu_hotplug && _have_fio
}

test_device() {
	echo "Running ${TEST_NAME}"

	if _test_dev_is_rotational; then
		size="32m"
	else
		size="1g"
	fi

	# Start fio job. When using the default cpu clocksource, fio does some
	# sched_setaffinity() calls, which may race with our hotplugging. Use
	# the clock_gettime clocksource to avoid that.
	_run_fio_rand_io --filename="$TEST_DEV" --size="$size" --clocksource=clock_gettime &


	local online_cpus=()
	local offline_cpus=()
	local offlining=1
	local max_offline=${#HOTPLUGGABLE_CPUS[@]}
	local o=$TMPDIR/offline_cpu_out
	if [[ ${#HOTPLUGGABLE_CPUS[@]} -eq ${#ALL_CPUS[@]} ]]; then
		(( max_offline-- ))
	fi
	for cpu in "${HOTPLUGGABLE_CPUS[@]}"; do
		if (( "$(cat "/sys/devices/system/cpu/cpu${cpu}/online")" )); then
			online_cpus+=("$cpu")
		else
			offline_cpus+=("$cpu")
		fi
	done

	# while job is running, hotplug CPUs
	# fio test may take too long time to complete read/write in special
	# size in some bad situations. Set a timeout here which does not block
	# overall test.
	start_time=$(date +%s)
	timeout=${TIMEOUT:=900}
	while sleep .2; kill -0 $! 2> /dev/null; do
		if (( offlining && ${#offline_cpus[@]} == max_offline )); then
			offlining=0
		elif (( ! offlining && ${#online_cpus[@]} == ${#HOTPLUGGABLE_CPUS[@]} )); then
			offlining=1
		fi

		if (( offlining )); then
			idx=$((RANDOM % ${#online_cpus[@]}))
			if _offline_cpu "${online_cpus[$idx]}" > "$o" 2>&1; then
				offline_cpus+=("${online_cpus[$idx]}")
				unset "online_cpus[$idx]"
				online_cpus=("${online_cpus[@]}")
			elif [[ $(<"$o") =~ "No space left on device" ]]; then
				# ENOSPC means CPU offline failure due to IRQ
				# vector shortage. Keep current number of
				# offline CPUs as maximum CPUs to offline.
				max_offline=${#offline_cpus[@]}
				offlining=0
			else
				echo "Failed to offline CPU: $(<"$o")"
				break
			fi
		fi

		if (( !offlining )); then
			idx=$((RANDOM % ${#offline_cpus[@]}))
			_online_cpu "${offline_cpus[$idx]}"
			online_cpus+=("${offline_cpus[$idx]}")
			unset "offline_cpus[$idx]"
			offline_cpus=("${offline_cpus[@]}")
		fi

		end_time=$(date +%s)
		if (( end_time - start_time > timeout + 15 )); then
			echo "fio did not finish after $timeout seconds!"
			break
		fi
	done

	FIO_PERF_FIELDS=("read iops")
	_fio_perf_report

	echo "Test complete"
}
