xref: /linux/tools/testing/selftests/rcutorture/bin/kvm-remote.sh (revision 1260ed77798502de9c98020040d2995008de10cc)
10092eae4SPaul E. McKenney#!/bin/bash
20092eae4SPaul E. McKenney# SPDX-License-Identifier: GPL-2.0+
30092eae4SPaul E. McKenney#
40092eae4SPaul E. McKenney# Run a series of tests on remote systems under KVM.
50092eae4SPaul E. McKenney#
60092eae4SPaul E. McKenney# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
70092eae4SPaul E. McKenney#	 kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
80092eae4SPaul E. McKenney#
90092eae4SPaul E. McKenney# Copyright (C) 2021 Facebook, Inc.
100092eae4SPaul E. McKenney#
110092eae4SPaul E. McKenney# Authors: Paul E. McKenney <paulmck@kernel.org>
120092eae4SPaul E. McKenney
130092eae4SPaul E. McKenneyscriptname=$0
140092eae4SPaul E. McKenneyargs="$*"
150092eae4SPaul E. McKenney
160092eae4SPaul E. McKenneyif ! test -d tools/testing/selftests/rcutorture/bin
170092eae4SPaul E. McKenneythen
180092eae4SPaul E. McKenney	echo $scriptname must be run from top-level directory of kernel source tree.
190092eae4SPaul E. McKenney	exit 1
200092eae4SPaul E. McKenneyfi
210092eae4SPaul E. McKenney
22a7d89cfbSPaul E. McKenneyRCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
23a7d89cfbSPaul E. McKenneyPATH=${RCUTORTURE}/bin:$PATH; export PATH
240092eae4SPaul E. McKenney. functions.sh
250092eae4SPaul E. McKenney
260092eae4SPaul E. McKenneystarttime="`get_starttime`"
270092eae4SPaul E. McKenney
280092eae4SPaul E. McKenneysystems="$1"
290092eae4SPaul E. McKenneyif test -z "$systems"
300092eae4SPaul E. McKenneythen
310092eae4SPaul E. McKenney	echo $scriptname: Empty list of systems will go nowhere good, giving up.
320092eae4SPaul E. McKenney	exit 1
330092eae4SPaul E. McKenneyfi
340092eae4SPaul E. McKenneyshift
350092eae4SPaul E. McKenney
360092eae4SPaul E. McKenney# Pathnames:
37c211ae9cSPaul E. McKenney# T:	  /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
38c211ae9cSPaul E. McKenney# resdir: /tmp/kvm-remote.sh.NNNNNN/res
39c211ae9cSPaul E. McKenney# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
400092eae4SPaul E. McKenney# oldrun: `pwd`/tools/testing/.../res/$otherds
410092eae4SPaul E. McKenney#
420092eae4SPaul E. McKenney# Pathname segments:
43c211ae9cSPaul E. McKenney# TD:	  kvm-remote.sh.NNNNNN
440092eae4SPaul E. McKenney# ds:	  yyyy.mm.dd-hh.mm.ss-remote
450092eae4SPaul E. McKenney
46c211ae9cSPaul E. McKenneyT="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
470092eae4SPaul E. McKenneytrap 'rm -rf $T' 0
48c211ae9cSPaul E. McKenneyTD="`basename "$T"`"
490092eae4SPaul E. McKenney
500092eae4SPaul E. McKenneyresdir="$T/res"
510092eae4SPaul E. McKenneyds=`date +%Y.%m.%d-%H.%M.%S`-remote
520092eae4SPaul E. McKenneyrundir=$resdir/$ds
530092eae4SPaul E. McKenneyecho Results directory: $rundir
540092eae4SPaul E. McKenneyecho $scriptname $args
550092eae4SPaul E. McKenneyif echo $1 | grep -q '^--'
560092eae4SPaul E. McKenneythen
570092eae4SPaul E. McKenney	# Fresh build.  Create a datestamp unless the caller supplied one.
580092eae4SPaul E. McKenney	datestamp="`echo "$@" | awk -v ds="$ds" '{
590092eae4SPaul E. McKenney		for (i = 1; i < NF; i++) {
600092eae4SPaul E. McKenney			if ($i == "--datestamp") {
610092eae4SPaul E. McKenney				ds = "";
620092eae4SPaul E. McKenney				break;
630092eae4SPaul E. McKenney			}
640092eae4SPaul E. McKenney		}
650092eae4SPaul E. McKenney		if (ds != "")
660092eae4SPaul E. McKenney			print "--datestamp " ds;
670092eae4SPaul E. McKenney	}'`"
683d78668eSPaul E. McKenney	kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
690092eae4SPaul E. McKenney	ret=$?
700092eae4SPaul E. McKenney	if test "$ret" -ne 0
710092eae4SPaul E. McKenney	then
720092eae4SPaul E. McKenney		echo $scriptname: kvm.sh failed exit code $?
730092eae4SPaul E. McKenney		cat $T/kvm.sh.out
740092eae4SPaul E. McKenney		exit 2
750092eae4SPaul E. McKenney	fi
760092eae4SPaul E. McKenney	oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
770092eae4SPaul E. McKenney	touch "$oldrun/remote-log"
780092eae4SPaul E. McKenney	echo $scriptname $args >> "$oldrun/remote-log"
790092eae4SPaul E. McKenney	echo | tee -a "$oldrun/remote-log"
800092eae4SPaul E. McKenney	echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
810092eae4SPaul E. McKenney	cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
820092eae4SPaul E. McKenney	# We are going to run this, so remove the buildonly files.
830092eae4SPaul E. McKenney	rm -f "$oldrun"/*/buildonly
840092eae4SPaul E. McKenney	kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
850092eae4SPaul E. McKenney	ret=$?
860092eae4SPaul E. McKenney	if test "$ret" -ne 0
870092eae4SPaul E. McKenney	then
880092eae4SPaul E. McKenney		echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
890092eae4SPaul E. McKenney		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
900092eae4SPaul E. McKenney		exit 2
910092eae4SPaul E. McKenney	fi
920092eae4SPaul E. McKenneyelse
930092eae4SPaul E. McKenney	# Re-use old run.
940092eae4SPaul E. McKenney	oldrun="$1"
950092eae4SPaul E. McKenney	if ! echo $oldrun | grep -q '^/'
960092eae4SPaul E. McKenney	then
970092eae4SPaul E. McKenney		oldrun="`pwd`/$oldrun"
980092eae4SPaul E. McKenney	fi
990092eae4SPaul E. McKenney	shift
1000092eae4SPaul E. McKenney	touch "$oldrun/remote-log"
1010092eae4SPaul E. McKenney	echo $scriptname $args >> "$oldrun/remote-log"
1020092eae4SPaul E. McKenney	kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
1030092eae4SPaul E. McKenney	ret=$?
1040092eae4SPaul E. McKenney	if test "$ret" -ne 0
1050092eae4SPaul E. McKenney	then
1060092eae4SPaul E. McKenney		echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
1070092eae4SPaul E. McKenney		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
1080092eae4SPaul E. McKenney		exit 2
1090092eae4SPaul E. McKenney	fi
110a7d89cfbSPaul E. McKenney	cp -a "$rundir" "$RCUTORTURE/res/"
111a7d89cfbSPaul E. McKenney	oldrun="$RCUTORTURE/res/$ds"
1120092eae4SPaul E. McKenneyfi
1130092eae4SPaul E. McKenneyecho | tee -a "$oldrun/remote-log"
1140092eae4SPaul E. McKenneyecho " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
1150092eae4SPaul E. McKenneycat $T/kvm-again.sh.out
1160092eae4SPaul E. McKenneyecho | tee -a "$oldrun/remote-log"
1170092eae4SPaul E. McKenneyecho Remote run directory: $rundir | tee -a "$oldrun/remote-log"
1180092eae4SPaul E. McKenneyecho Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
1190092eae4SPaul E. McKenney
1200092eae4SPaul E. McKenney# Create the kvm-remote-N.sh scripts in the bin directory.
1210092eae4SPaul E. McKenneyawk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
1220092eae4SPaul E. McKenney{
1230092eae4SPaul E. McKenney	n = $1;
1240092eae4SPaul E. McKenney	sub(/\./, "", n);
1250092eae4SPaul E. McKenney	fn = dest "/kvm-remote-" n ".sh"
1265a2898f1SPaul E. McKenney	print "kvm-remote-noreap.sh " rundir " &" > fn;
1270092eae4SPaul E. McKenney	scenarios = "";
1280092eae4SPaul E. McKenney	for (i = 2; i <= NF; i++)
1290092eae4SPaul E. McKenney		scenarios = scenarios " " $i;
1305a2898f1SPaul E. McKenney	print "kvm-test-1-run-batch.sh" scenarios >> fn;
131442f99afSPaul E. McKenney	print "sync" >> fn;
1320092eae4SPaul E. McKenney	print "rm " rundir "/remote.run" >> fn;
1330092eae4SPaul E. McKenney}'
1340092eae4SPaul E. McKenneychmod +x $T/bin/kvm-remote-*.sh
1350092eae4SPaul E. McKenney( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
1360092eae4SPaul E. McKenney
1370092eae4SPaul E. McKenney# Check first to avoid the need for cleanup for system-name typos
1380092eae4SPaul E. McKenneyfor i in $systems
1390092eae4SPaul E. McKenneydo
14080021ffbSPaul E. McKenney	ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN > $T/ssh.stdout 2> $T/ssh.stderr
1410092eae4SPaul E. McKenney	ret=$?
1420092eae4SPaul E. McKenney	if test "$ret" -ne 0
1430092eae4SPaul E. McKenney	then
14480021ffbSPaul E. McKenney		echo "System $i unreachable ($ret), giving up." | tee -a "$oldrun/remote-log"
14580021ffbSPaul E. McKenney		echo ' --- ssh stdout: vvv' | tee -a "$oldrun/remote-log"
14680021ffbSPaul E. McKenney		cat $T/ssh.stdout | tee -a "$oldrun/remote-log"
14780021ffbSPaul E. McKenney		echo ' --- ssh stdout: ^^^' | tee -a "$oldrun/remote-log"
14880021ffbSPaul E. McKenney		echo ' --- ssh stderr: vvv' | tee -a "$oldrun/remote-log"
14980021ffbSPaul E. McKenney		cat $T/ssh.stderr | tee -a "$oldrun/remote-log"
15080021ffbSPaul E. McKenney		echo ' --- ssh stderr: ^^^' | tee -a "$oldrun/remote-log"
151b6c9dbf0SPaul E. McKenney		exit 4
1520092eae4SPaul E. McKenney	fi
15380021ffbSPaul E. McKenney	echo $i: `cat $T/ssh.stdout` CPUs " " `date` | tee -a "$oldrun/remote-log"
1540092eae4SPaul E. McKenneydone
1550092eae4SPaul E. McKenney
1560092eae4SPaul E. McKenney# Download and expand the tarball on all systems.
157faaaf2acSPaul E. McKenneyecho Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
1580092eae4SPaul E. McKenneyfor i in $systems
1590092eae4SPaul E. McKenneydo
1600092eae4SPaul E. McKenney	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
161b20842baSPaul E. McKenney	cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
1620092eae4SPaul E. McKenney	ret=$?
1632bc9062eSPaul E. McKenney	tries=0
1642bc9062eSPaul E. McKenney	while test "$ret" -ne 0
1652bc9062eSPaul E. McKenney	do
1662bc9062eSPaul E. McKenney		echo Unable to download $T/binres.tgz to system $i, waiting and then retrying.  $tries prior retries. | tee -a "$oldrun/remote-log"
167f6153700SPaul E. McKenney		sleep 60
168b20842baSPaul E. McKenney		cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
169f6153700SPaul E. McKenney		ret=$?
170f6153700SPaul E. McKenney		if test "$ret" -ne 0
171f6153700SPaul E. McKenney		then
1722bc9062eSPaul E. McKenney			if test "$tries" > 5
1732bc9062eSPaul E. McKenney			then
1740092eae4SPaul E. McKenney				echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
175f6153700SPaul E. McKenney				exit 10
176f6153700SPaul E. McKenney			fi
1770092eae4SPaul E. McKenney		fi
1782bc9062eSPaul E. McKenney		tries=$((tries+1))
1792bc9062eSPaul E. McKenney	done
1800092eae4SPaul E. McKenneydone
1810092eae4SPaul E. McKenney
182c43d3b00SPaul E. McKenney# Function to check for presence of a file on the specified system.
183c43d3b00SPaul E. McKenney# Complain if the system cannot be reached, and retry after a wait.
184*6ca774f0SPaul E. McKenney# Currently just waits 15 minutes if a machine disappears.
185c43d3b00SPaul E. McKenney#
186c43d3b00SPaul E. McKenney# Usage: checkremotefile system pathname
187c43d3b00SPaul E. McKenneycheckremotefile () {
188*6ca774f0SPaul E. McKenney	local nsshfails=0
189c43d3b00SPaul E. McKenney	local ret
190c43d3b00SPaul E. McKenney	local sleeptime=60
191c43d3b00SPaul E. McKenney
192c43d3b00SPaul E. McKenney	while :
193c43d3b00SPaul E. McKenney	do
194b20842baSPaul E. McKenney		ssh -o BatchMode=yes $1 "test -f \"$2\""
195c43d3b00SPaul E. McKenney		ret=$?
196442f99afSPaul E. McKenney		if test "$ret" -eq 255
197c43d3b00SPaul E. McKenney		then
19890b21bcfSPaul E. McKenney			echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
199*6ca774f0SPaul E. McKenney			nsshfails=$((nsshfails+1))
200*6ca774f0SPaul E. McKenney			if ((nsshfails > 15))
201*6ca774f0SPaul E. McKenney			then
202*6ca774f0SPaul E. McKenney				return 255
203*6ca774f0SPaul E. McKenney			fi
204442f99afSPaul E. McKenney		elif test "$ret" -eq 0
205442f99afSPaul E. McKenney		then
206442f99afSPaul E. McKenney			return 0
207442f99afSPaul E. McKenney		elif test "$ret" -eq 1
208442f99afSPaul E. McKenney		then
20990b21bcfSPaul E. McKenney			echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log"
210442f99afSPaul E. McKenney			return 1
211442f99afSPaul E. McKenney		else
21290b21bcfSPaul E. McKenney			echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
213c43d3b00SPaul E. McKenney			return $ret
214c43d3b00SPaul E. McKenney		fi
215c43d3b00SPaul E. McKenney		sleep $sleeptime
216c43d3b00SPaul E. McKenney	done
217c43d3b00SPaul E. McKenney}
218c43d3b00SPaul E. McKenney
2190092eae4SPaul E. McKenney# Function to start batches on idle remote $systems
2200092eae4SPaul E. McKenney#
2210092eae4SPaul E. McKenney# Usage: startbatches curbatch nbatches
2220092eae4SPaul E. McKenney#
2230092eae4SPaul E. McKenney# Batches are numbered starting at 1.  Returns the next batch to start.
2240092eae4SPaul E. McKenney# Be careful to redirect all debug output to FD 2 (stderr).
2250092eae4SPaul E. McKenneystartbatches () {
2260092eae4SPaul E. McKenney	local curbatch="$1"
2270092eae4SPaul E. McKenney	local nbatches="$2"
2280092eae4SPaul E. McKenney	local ret
2290092eae4SPaul E. McKenney
2300092eae4SPaul E. McKenney	# Each pass through the following loop examines one system.
2310092eae4SPaul E. McKenney	for i in $systems
2320092eae4SPaul E. McKenney	do
2330092eae4SPaul E. McKenney		if test "$curbatch" -gt "$nbatches"
2340092eae4SPaul E. McKenney		then
2350092eae4SPaul E. McKenney			echo $((nbatches + 1))
2360092eae4SPaul E. McKenney			return 0
2370092eae4SPaul E. McKenney		fi
238c43d3b00SPaul E. McKenney		if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
2390092eae4SPaul E. McKenney		then
2400092eae4SPaul E. McKenney			continue # System still running last test, skip.
2410092eae4SPaul E. McKenney		fi
242b20842baSPaul E. McKenney		ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
2430092eae4SPaul E. McKenney		ret=$?
2440092eae4SPaul E. McKenney		if test "$ret" -ne 0
2450092eae4SPaul E. McKenney		then
2460092eae4SPaul E. McKenney			echo ssh $i failed: exitcode $ret 1>&2
2470092eae4SPaul E. McKenney			exit 11
2480092eae4SPaul E. McKenney		fi
2490092eae4SPaul E. McKenney		echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
2500092eae4SPaul E. McKenney		curbatch=$((curbatch + 1))
2510092eae4SPaul E. McKenney	done
2520092eae4SPaul E. McKenney	echo $curbatch
2530092eae4SPaul E. McKenney}
2540092eae4SPaul E. McKenney
2550092eae4SPaul E. McKenney# Launch all the scenarios.
2560092eae4SPaul E. McKenneynbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
2570092eae4SPaul E. McKenneycurbatch=1
2580092eae4SPaul E. McKenneywhile test "$curbatch" -le "$nbatches"
2590092eae4SPaul E. McKenneydo
2600092eae4SPaul E. McKenney	startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
2610092eae4SPaul E. McKenney	curbatch="`cat $T/curbatch`"
2620092eae4SPaul E. McKenney	if test -s "$T/startbatches.stderr"
2630092eae4SPaul E. McKenney	then
2640092eae4SPaul E. McKenney		cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
2650092eae4SPaul E. McKenney	fi
2660092eae4SPaul E. McKenney	if test "$curbatch" -le "$nbatches"
2670092eae4SPaul E. McKenney	then
2680092eae4SPaul E. McKenney		sleep 30
2690092eae4SPaul E. McKenney	fi
2700092eae4SPaul E. McKenneydone
27190b21bcfSPaul E. McKenneyecho All batches started. `date` | tee -a "$oldrun/remote-log"
2720092eae4SPaul E. McKenney
2730092eae4SPaul E. McKenney# Wait for all remaining scenarios to complete and collect results.
2740092eae4SPaul E. McKenneyfor i in $systems
2750092eae4SPaul E. McKenneydo
276ab69d3c8SPaul E. McKenney	echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
277*6ca774f0SPaul E. McKenney	while :
2780092eae4SPaul E. McKenney	do
279*6ca774f0SPaul E. McKenney		checkremotefile "$i" "$resdir/$ds/remote.run"
280*6ca774f0SPaul E. McKenney		ret=$?
281*6ca774f0SPaul E. McKenney		if test "$ret" -eq 1
282*6ca774f0SPaul E. McKenney		then
28390b21bcfSPaul E. McKenney			echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
284b20842baSPaul E. McKenney			( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
285*6ca774f0SPaul E. McKenney			break;
286*6ca774f0SPaul E. McKenney		fi
287*6ca774f0SPaul E. McKenney		if test "$ret" -eq 255
288*6ca774f0SPaul E. McKenney		then
289*6ca774f0SPaul E. McKenney			echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log"
290*6ca774f0SPaul E. McKenney			break;
291*6ca774f0SPaul E. McKenney		fi
292*6ca774f0SPaul E. McKenney		sleep 30
293*6ca774f0SPaul E. McKenney	done
2940092eae4SPaul E. McKenneydone
2950092eae4SPaul E. McKenney
2960092eae4SPaul E. McKenney( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
2970092eae4SPaul E. McKenneyexit "`cat $T/exitcode`"
298