xref: /linux/tools/testing/selftests/net/forwarding/lib.sh (revision 91a4855d6c03e770e42f17c798a36a3c46e63de2)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#shellcheck disable=SC2034 # SC doesn't see our uses of global variables
4
5##############################################################################
6# Topology description. p1 looped back to p2, p3 to p4 and so on.
7
8declare -A NETIFS=(
9    [p1]=veth0
10    [p2]=veth1
11    [p3]=veth2
12    [p4]=veth3
13    [p5]=veth4
14    [p6]=veth5
15    [p7]=veth6
16    [p8]=veth7
17    [p9]=veth8
18    [p10]=veth9
19)
20
21# Port that does not have a cable connected.
22: "${NETIF_NO_CABLE:=eth8}"
23
24##############################################################################
25# Defines
26
27# Networking utilities.
28: "${PING:=ping}"
29: "${PING6:=ping6}"	# Some distros just use ping.
30: "${ARPING:=arping}"
31: "${TROUTE6:=traceroute6}"
32
33# Packet generator.
34: "${MZ:=mausezahn}"	# Some distributions use 'mz'.
35: "${MZ_DELAY:=0}"
36
37# Host configuration tools.
38: "${TEAMD:=teamd}"
39: "${MCD:=smcrouted}"
40: "${MC_CLI:=smcroutectl}"
41: "${MCD_TABLE_NAME:=selftests}"
42
43# Constants for netdevice bring-up:
44# Default time in seconds to wait for an interface to come up before giving up
45# and bailing out. Used during initial setup.
46: "${INTERFACE_TIMEOUT:=600}"
47# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts.
48: "${WAIT_TIMEOUT:=20}"
49# Time to wait after interfaces participating in the test are all UP.
50: "${WAIT_TIME:=5}"
51
52# Whether to pause on, respectively, after a failure and before cleanup.
53: "${PAUSE_ON_CLEANUP:=no}"
54
55# Whether to create virtual interfaces, and what netdevice type they should be.
56: "${NETIF_CREATE:=yes}"
57: "${NETIF_TYPE:=veth}"
58
59# Constants for ping tests:
60# How many packets should be sent.
61: "${PING_COUNT:=10}"
62# Timeout (in seconds) before ping exits regardless of how many packets have
63# been sent or received
64: "${PING_TIMEOUT:=5}"
65
66# Minimum ageing_time (in centiseconds) supported by hardware
67: "${LOW_AGEING_TIME:=1000}"
68
69# Whether to check for availability of certain tools.
70: "${REQUIRE_JQ:=yes}"
71: "${REQUIRE_MZ:=yes}"
72: "${REQUIRE_MTOOLS:=no}"
73: "${REQUIRE_TEAMD:=no}"
74
75# Whether to override MAC addresses on interfaces participating in the test.
76: "${STABLE_MAC_ADDRS:=no}"
77
78# Flags for tcpdump
79: "${TCPDUMP_EXTRA_FLAGS:=}"
80
81# Flags for TC filters.
82: "${TC_FLAG:=skip_hw}"
83
84# Whether the machine is "slow" -- i.e. might be incapable of running tests
85# involving heavy traffic. This might be the case on a debug kernel, a VM, or
86# e.g. a low-power board.
87: "${KSFT_MACHINE_SLOW:=no}"
88
89##############################################################################
90# Find netifs by test-specified driver name
91
92driver_name_get()
93{
94	local dev=$1; shift
95	local driver_path="/sys/class/net/$dev/device/driver"
96
97	if [[ -L $driver_path ]]; then
98		basename `realpath $driver_path`
99	fi
100}
101
102netif_find_driver()
103{
104	local ifnames=`ip -j link show | jq -r ".[].ifname"`
105	local count=0
106
107	for ifname in $ifnames
108	do
109		local driver_name=`driver_name_get $ifname`
110		if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then
111			count=$((count + 1))
112			NETIFS[p$count]="$ifname"
113		fi
114	done
115}
116
117# Whether to find netdevice according to the driver speficied by the importer
118: "${NETIF_FIND_DRIVER:=}"
119
120if [[ $NETIF_FIND_DRIVER ]]; then
121	unset NETIFS
122	declare -A NETIFS
123	netif_find_driver
124fi
125
126net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
127
128if [[ -f $net_forwarding_dir/forwarding.config ]]; then
129	source "$net_forwarding_dir/forwarding.config"
130fi
131
132source "$net_forwarding_dir/../lib.sh"
133
134##############################################################################
135# Sanity checks
136
137check_tc_version()
138{
139	tc -j &> /dev/null
140	if [[ $? -ne 0 ]]; then
141		echo "SKIP: iproute2 too old; tc is missing JSON support"
142		exit $ksft_skip
143	fi
144}
145
146check_tc_erspan_support()
147{
148	local dev=$1; shift
149
150	tc filter add dev $dev ingress pref 1 handle 1 flower \
151		erspan_opts 1:0:0:0 &> /dev/null
152	if [[ $? -ne 0 ]]; then
153		echo "SKIP: iproute2 too old; tc is missing erspan support"
154		return $ksft_skip
155	fi
156	tc filter del dev $dev ingress pref 1 handle 1 flower \
157		erspan_opts 1:0:0:0 &> /dev/null
158}
159
160# Old versions of tc don't understand "mpls_uc"
161check_tc_mpls_support()
162{
163	local dev=$1; shift
164
165	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
166		matchall action pipe &> /dev/null
167	if [[ $? -ne 0 ]]; then
168		echo "SKIP: iproute2 too old; tc is missing MPLS support"
169		return $ksft_skip
170	fi
171	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
172		matchall
173}
174
175# Old versions of tc produce invalid json output for mpls lse statistics
176check_tc_mpls_lse_stats()
177{
178	local dev=$1; shift
179	local ret;
180
181	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
182		flower mpls lse depth 2                                 \
183		action continue &> /dev/null
184
185	if [[ $? -ne 0 ]]; then
186		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
187		return $ksft_skip
188	fi
189
190	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
191	ret=$?
192	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
193		flower
194
195	if [[ $ret -ne 0 ]]; then
196		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
197		return $ksft_skip
198	fi
199}
200
201check_tc_shblock_support()
202{
203	tc filter help 2>&1 | grep block &> /dev/null
204	if [[ $? -ne 0 ]]; then
205		echo "SKIP: iproute2 too old; tc is missing shared block support"
206		exit $ksft_skip
207	fi
208}
209
210check_tc_chain_support()
211{
212	tc help 2>&1|grep chain &> /dev/null
213	if [[ $? -ne 0 ]]; then
214		echo "SKIP: iproute2 too old; tc is missing chain support"
215		exit $ksft_skip
216	fi
217}
218
219check_tc_action_hw_stats_support()
220{
221	tc actions help 2>&1 | grep -q hw_stats
222	if [[ $? -ne 0 ]]; then
223		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
224		exit $ksft_skip
225	fi
226}
227
228check_tc_fp_support()
229{
230	tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp "
231	if [[ $? -ne 0 ]]; then
232		echo "SKIP: iproute2 too old; tc is missing frame preemption support"
233		exit $ksft_skip
234	fi
235}
236
237check_ethtool_lanes_support()
238{
239	ethtool --help 2>&1| grep lanes &> /dev/null
240	if [[ $? -ne 0 ]]; then
241		echo "SKIP: ethtool too old; it is missing lanes support"
242		exit $ksft_skip
243	fi
244}
245
246check_ethtool_mm_support()
247{
248	ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null
249	if [[ $? -ne 0 ]]; then
250		echo "SKIP: ethtool too old; it is missing MAC Merge layer support"
251		exit $ksft_skip
252	fi
253}
254
255check_ethtool_counter_group_support()
256{
257	ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null
258	if [[ $? -ne 0 ]]; then
259		echo "SKIP: ethtool too old; it is missing standard counter group support"
260		exit $ksft_skip
261	fi
262}
263
264check_ethtool_pmac_std_stats_support()
265{
266	local dev=$1; shift
267	local grp=$1; shift
268
269	[ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \
270		| jq ".[].\"$grp\" | length") ]
271}
272
273check_locked_port_support()
274{
275	if ! bridge -d link show | grep -q " locked"; then
276		echo "SKIP: iproute2 too old; Locked port feature not supported."
277		return $ksft_skip
278	fi
279}
280
281check_port_mab_support()
282{
283	if ! bridge -d link show | grep -q "mab"; then
284		echo "SKIP: iproute2 too old; MacAuth feature not supported."
285		return $ksft_skip
286	fi
287}
288
289if [[ "$(id -u)" -ne 0 ]]; then
290	echo "SKIP: need root privileges"
291	exit $ksft_skip
292fi
293
294check_driver()
295{
296	local dev=$1; shift
297	local expected=$1; shift
298	local driver_name=`driver_name_get $dev`
299
300	if [[ $driver_name != $expected ]]; then
301		echo "SKIP: expected driver $expected for $dev, got $driver_name instead"
302		exit $ksft_skip
303	fi
304}
305
306if [[ "$CHECK_TC" = "yes" ]]; then
307	check_tc_version
308fi
309
310# IPv6 support was added in v3.0
311check_mtools_version()
312{
313	local version="$(msend -v)"
314	local major
315
316	version=${version##msend version }
317	major=$(echo $version | cut -d. -f1)
318
319	if [ $major -lt 3 ]; then
320		echo "SKIP: expected mtools version 3.0, got $version"
321		exit $ksft_skip
322	fi
323}
324
325if [[ "$REQUIRE_JQ" = "yes" ]]; then
326	require_command jq
327fi
328if [[ "$REQUIRE_MZ" = "yes" ]]; then
329	require_command $MZ
330fi
331if [[ "$REQUIRE_TEAMD" = "yes" ]]; then
332	require_command $TEAMD
333fi
334if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
335	# https://github.com/troglobit/mtools
336	require_command msend
337	require_command mreceive
338	check_mtools_version
339fi
340
341##############################################################################
342# Command line options handling
343
344check_env() {
345	if [[ ! (( -n "$LOCAL_V4" && -n "$REMOTE_V4") ||
346		 ( -n "$LOCAL_V6" && -n "$REMOTE_V6" )) ]]; then
347		echo "SKIP: Invalid environment, missing or inconsistent LOCAL_V4/REMOTE_V4/LOCAL_V6/REMOTE_V6"
348		echo "Please see tools/testing/selftests/drivers/net/README.rst"
349		exit "$ksft_skip"
350	fi
351
352	if [[ -z "$REMOTE_TYPE" ]]; then
353		echo "SKIP: Invalid environment, missing REMOTE_TYPE"
354		exit "$ksft_skip"
355	fi
356
357	if [[ -z "$REMOTE_ARGS" ]]; then
358		echo "SKIP: Invalid environment, missing REMOTE_ARGS"
359		exit "$ksft_skip"
360	fi
361}
362
363__run_on()
364{
365	local target=$1; shift
366	local type args
367
368	IFS=':' read -r type args <<< "$target"
369
370	case "$type" in
371	netns)
372		# Execute command in network namespace
373		# args contains the namespace name
374		ip netns exec "$args" "$@"
375		;;
376	ssh)
377		# Execute command via SSH args contains user@host
378		ssh -n "$args" "$@"
379		;;
380	local|*)
381		# Execute command locally. This is also the fallback
382		# case for when the interface's target is not found in
383		# the TARGETS array.
384		"$@"
385		;;
386	esac
387}
388
389run_on()
390{
391	local iface=$1; shift
392	local target="local:"
393
394	if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then
395		target="${TARGETS[$iface]}"
396	fi
397
398	__run_on "$target" "$@"
399}
400
401get_ifname_by_ip()
402{
403	local target=$1; shift
404	local ip_addr=$1; shift
405
406	__run_on "$target" ip -j addr show to "$ip_addr" | jq -r '.[].ifname'
407}
408
409# Whether the test is conforming to the requirements and usage described in
410# drivers/net/README.rst.
411: "${DRIVER_TEST_CONFORMANT:=no}"
412
413declare -A TARGETS
414
415# Based on DRIVER_TEST_CONFORMANT, decide if to source drivers/net/net.config
416# or not. In the "yes" case, the test expects to pass the arguments through the
417# variables specified in drivers/net/README.rst file. If not, fallback on
418# parsing the script arguments for interface names.
419if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then
420	if [[ -f $net_forwarding_dir/../../drivers/net/net.config ]]; then
421		source "$net_forwarding_dir/../../drivers/net/net.config"
422	fi
423
424	if (( NUM_NETIFS > 2)); then
425		echo "SKIP: DRIVER_TEST_CONFORMANT=yes and NUM_NETIFS is bigger than 2"
426		exit "$ksft_skip"
427	fi
428
429	check_env
430
431	# Populate the NETIFS and TARGETS arrays automatically based on the
432	# environment variables. The TARGETS array is indexed by the network
433	# interface name keeping track of the target on which the interface
434	# resides. Values will be strings of the following format -
435	# <type>:<args>.
436	#
437	# TARGETS[eth0]="local:" - meaning that the eth0 interface is
438	# accessible locally
439	# TARGETS[eth1]="netns:foo" - eth1 is in the foo netns
440	# TARGETS[eth2]="ssh:root@10.0.0.2" - eth2 is accessible through
441	# running the 'ssh root@10.0.0.2' command.
442
443	unset NETIFS
444	declare -A NETIFS
445
446	NETIFS[p1]="$NETIF"
447	TARGETS[$NETIF]="local:"
448
449	# Locate the name of the remote interface
450	remote_target="$REMOTE_TYPE:$REMOTE_ARGS"
451	if [[ -v REMOTE_V4 ]]; then
452		remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V4")
453	else
454		remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V6")
455	fi
456	if [[ ! -n "$remote_netif" ]]; then
457		echo "SKIP: cannot find remote interface"
458		exit "$ksft_skip"
459	fi
460
461	if [[ "$NETIF" == "$remote_netif" ]]; then
462		echo "SKIP: local and remote interfaces cannot have the same name"
463		exit "$ksft_skip"
464	fi
465
466	NETIFS[p2]="$remote_netif"
467	TARGETS[$remote_netif]="$REMOTE_TYPE:$REMOTE_ARGS"
468else
469	count=0
470	# Prime NETIFS from the command line, but retain if none given.
471	if [[ $# -gt 0 ]]; then
472		unset NETIFS
473		declare -A NETIFS
474
475		while [[ $# -gt 0 ]]; do
476			count=$((count + 1))
477			NETIFS[p$count]="$1"
478			TARGETS[$1]="local:"
479			shift
480		done
481	fi
482fi
483
484##############################################################################
485# Network interfaces configuration
486
487if [[ ! -v NUM_NETIFS ]]; then
488	echo "SKIP: importer does not define \"NUM_NETIFS\""
489	exit $ksft_skip
490fi
491
492if (( NUM_NETIFS > ${#NETIFS[@]} )); then
493	echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})"
494	exit $ksft_skip
495fi
496
497for i in $(seq ${#NETIFS[@]}); do
498	if [[ ! ${NETIFS[p$i]} ]]; then
499		echo "SKIP: NETIFS[p$i] not given"
500		exit $ksft_skip
501	fi
502done
503
504create_netif_veth()
505{
506	local i
507
508	for ((i = 1; i <= NUM_NETIFS; ++i)); do
509		local j=$((i+1))
510
511		if [ -z ${NETIFS[p$i]} ]; then
512			echo "SKIP: Cannot create interface. Name not specified"
513			exit $ksft_skip
514		fi
515
516		ip link show dev ${NETIFS[p$i]} &> /dev/null
517		if [[ $? -ne 0 ]]; then
518			ip link add ${NETIFS[p$i]} type veth \
519				peer name ${NETIFS[p$j]}
520			if [[ $? -ne 0 ]]; then
521				echo "Failed to create netif"
522				exit 1
523			fi
524		fi
525		i=$j
526	done
527}
528
529create_netif()
530{
531	case "$NETIF_TYPE" in
532	veth) create_netif_veth
533	      ;;
534	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
535	   exit 1
536	   ;;
537	esac
538}
539
540declare -A MAC_ADDR_ORIG
541mac_addr_prepare()
542{
543	local new_addr=
544	local dev=
545
546	for ((i = 1; i <= NUM_NETIFS; ++i)); do
547		dev=${NETIFS[p$i]}
548		new_addr=$(printf "00:01:02:03:04:%02x" $i)
549
550		MAC_ADDR_ORIG["$dev"]=$(run_on "$dev" \
551			ip -j link show dev "$dev" | jq -e '.[].address')
552		# Strip quotes
553		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
554		run_on "$dev" ip link set dev "$dev" address $new_addr
555	done
556}
557
558mac_addr_restore()
559{
560	local dev=
561
562	for ((i = 1; i <= NUM_NETIFS; ++i)); do
563		dev=${NETIFS[p$i]}
564		run_on "$dev" \
565			ip link set dev "$dev" address ${MAC_ADDR_ORIG["$dev"]}
566	done
567}
568
569if [[ "$NETIF_CREATE" = "yes" ]]; then
570	create_netif
571fi
572
573if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
574	mac_addr_prepare
575fi
576
577for ((i = 1; i <= NUM_NETIFS; ++i)); do
578	int="${NETIFS[p$i]}"
579
580	run_on "$int" ip link show dev "$int" &> /dev/null
581	if [[ $? -ne 0 ]]; then
582		echo "SKIP: could not find all required interfaces"
583		exit $ksft_skip
584	fi
585done
586
587##############################################################################
588# Helpers
589
590not()
591{
592	"$@"
593	[[ $? != 0 ]]
594}
595
596get_max()
597{
598	local arr=("$@")
599
600	max=${arr[0]}
601	for cur in ${arr[@]}; do
602		if [[ $cur -gt $max ]]; then
603			max=$cur
604		fi
605	done
606
607	echo $max
608}
609
610grep_bridge_fdb()
611{
612	local addr=$1; shift
613	local word
614	local flag
615
616	if [ "$1" == "self" ] || [ "$1" == "master" ]; then
617		word=$1; shift
618		if [ "$1" == "-v" ]; then
619			flag=$1; shift
620		fi
621	fi
622
623	$@ | grep $addr | grep $flag "$word"
624}
625
626wait_for_port_up()
627{
628	"$@" | grep -q "Link detected: yes"
629}
630
631wait_for_offload()
632{
633	"$@" | grep -q offload
634}
635
636wait_for_trap()
637{
638	"$@" | grep -q trap
639}
640
641setup_wait_dev()
642{
643	local dev=$1; shift
644	local wait_time=${1:-$WAIT_TIME}; shift
645
646	setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time
647
648	if (($?)); then
649		check_err 1
650		log_test setup_wait_dev ": Interface $dev does not come up."
651		exit 1
652	fi
653}
654
655setup_wait_dev_with_timeout()
656{
657	local dev=$1; shift
658	local max_iterations=${1:-$WAIT_TIMEOUT}; shift
659	local wait_time=${1:-$WAIT_TIME}; shift
660	local i
661
662	for ((i = 1; i <= $max_iterations; ++i)); do
663		run_on "$dev" ip link show dev "$dev" up \
664			| grep 'state UP' &> /dev/null
665		if [[ $? -ne 0 ]]; then
666			sleep 1
667		else
668			sleep $wait_time
669			return 0
670		fi
671	done
672
673	return 1
674}
675
676setup_wait_n()
677{
678	local num_netifs=$1; shift
679	local i
680
681	for ((i = 1; i <= num_netifs; ++i)); do
682		setup_wait_dev ${NETIFS[p$i]} 0
683	done
684
685	# Make sure links are ready.
686	sleep $WAIT_TIME
687}
688
689setup_wait()
690{
691	setup_wait_n "$NUM_NETIFS"
692}
693
694wait_for_dev()
695{
696        local dev=$1; shift
697        local timeout=${1:-$WAIT_TIMEOUT}; shift
698
699        slowwait $timeout ip link show dev $dev &> /dev/null
700        if (( $? )); then
701                check_err 1
702                log_test wait_for_dev "Interface $dev did not appear."
703                exit $EXIT_STATUS
704        fi
705}
706
707pre_cleanup()
708{
709	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
710		echo "Pausing before cleanup, hit any key to continue"
711		read
712	fi
713
714	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
715		mac_addr_restore
716	fi
717}
718
719vrf_prepare()
720{
721	ip -4 rule add pref 32765 table local
722	ip -4 rule del pref 0
723	ip -6 rule add pref 32765 table local
724	ip -6 rule del pref 0
725}
726
727vrf_cleanup()
728{
729	ip -6 rule add pref 0 table local
730	ip -6 rule del pref 32765
731	ip -4 rule add pref 0 table local
732	ip -4 rule del pref 32765
733}
734
735adf_vrf_prepare()
736{
737	vrf_prepare
738	defer vrf_cleanup
739}
740
741__last_tb_id=0
742declare -A __TB_IDS
743
744__vrf_td_id_assign()
745{
746	local vrf_name=$1
747
748	__last_tb_id=$((__last_tb_id + 1))
749	__TB_IDS[$vrf_name]=$__last_tb_id
750	return $__last_tb_id
751}
752
753__vrf_td_id_lookup()
754{
755	local vrf_name=$1
756
757	return ${__TB_IDS[$vrf_name]}
758}
759
760vrf_create()
761{
762	local vrf_name=$1
763	local tb_id
764
765	__vrf_td_id_assign $vrf_name
766	tb_id=$?
767
768	ip link add dev $vrf_name type vrf table $tb_id
769	ip -4 route add table $tb_id unreachable default metric 4278198272
770	ip -6 route add table $tb_id unreachable default metric 4278198272
771}
772
773vrf_destroy()
774{
775	local vrf_name=$1
776	local tb_id
777
778	__vrf_td_id_lookup $vrf_name
779	tb_id=$?
780
781	ip -6 route del table $tb_id unreachable default metric 4278198272
782	ip -4 route del table $tb_id unreachable default metric 4278198272
783	ip link del dev $vrf_name
784}
785
786__addr_add_del()
787{
788	local if_name=$1
789	local add_del=$2
790	local array
791
792	shift
793	shift
794	array=("${@}")
795
796	for addrstr in "${array[@]}"; do
797		ip address $add_del $addrstr dev $if_name
798	done
799}
800
801__simple_if_init()
802{
803	local if_name=$1; shift
804	local vrf_name=$1; shift
805	local addrs=("${@}")
806
807	ip link set dev $if_name master $vrf_name
808	ip link set dev $if_name up
809
810	__addr_add_del $if_name add "${addrs[@]}"
811}
812
813__simple_if_fini()
814{
815	local if_name=$1; shift
816	local addrs=("${@}")
817
818	__addr_add_del $if_name del "${addrs[@]}"
819
820	ip link set dev $if_name down
821	ip link set dev $if_name nomaster
822}
823
824simple_if_init()
825{
826	local if_name=$1
827	local vrf_name
828	local array
829
830	shift
831	vrf_name=v$if_name
832	array=("${@}")
833
834	vrf_create $vrf_name
835	ip link set dev $vrf_name up
836	__simple_if_init $if_name $vrf_name "${array[@]}"
837}
838
839simple_if_fini()
840{
841	local if_name=$1
842	local vrf_name
843	local array
844
845	shift
846	vrf_name=v$if_name
847	array=("${@}")
848
849	__simple_if_fini $if_name "${array[@]}"
850	vrf_destroy $vrf_name
851}
852
853adf_simple_if_init()
854{
855	simple_if_init "$@"
856	defer simple_if_fini "$@"
857}
858
859tunnel_create()
860{
861	local name=$1; shift
862	local type=$1; shift
863	local local=$1; shift
864	local remote=$1; shift
865
866	ip link add name $name type $type \
867	   local $local remote $remote "$@"
868	ip link set dev $name up
869}
870
871tunnel_destroy()
872{
873	local name=$1; shift
874
875	ip link del dev $name
876}
877
878vlan_create()
879{
880	local if_name=$1; shift
881	local vid=$1; shift
882	local vrf=$1; shift
883	local ips=("${@}")
884	local name=$if_name.$vid
885
886	ip link add name $name link $if_name type vlan id $vid
887	if [ "$vrf" != "" ]; then
888		ip link set dev $name master $vrf
889	fi
890	ip link set dev $name up
891	__addr_add_del $name add "${ips[@]}"
892}
893
894vlan_destroy()
895{
896	local if_name=$1; shift
897	local vid=$1; shift
898	local name=$if_name.$vid
899
900	ip link del dev $name
901}
902
903team_create()
904{
905	local if_name=$1; shift
906	local mode=$1; shift
907
908	require_command $TEAMD
909	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
910	for slave in "$@"; do
911		ip link set dev $slave down
912		ip link set dev $slave master $if_name
913		ip link set dev $slave up
914	done
915	ip link set dev $if_name up
916}
917
918team_destroy()
919{
920	local if_name=$1; shift
921
922	$TEAMD -t $if_name -k
923}
924
925master_name_get()
926{
927	local if_name=$1
928
929	ip -j link show dev $if_name | jq -r '.[]["master"]'
930}
931
932link_stats_get()
933{
934	local if_name=$1; shift
935	local dir=$1; shift
936	local stat=$1; shift
937
938	ip -j -s link show dev $if_name \
939		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
940}
941
942link_stats_tx_packets_get()
943{
944	link_stats_get $1 tx packets
945}
946
947link_stats_rx_errors_get()
948{
949	link_stats_get $1 rx errors
950}
951
952ethtool_stats_get()
953{
954	local dev=$1; shift
955	local stat=$1; shift
956
957	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
958}
959
960ethtool_std_stats_get()
961{
962	local dev=$1; shift
963	local grp=$1; shift
964	local name=$1; shift
965	local src=$1; shift
966
967	if [[ "$grp" == "pause" ]]; then
968		run_on "$dev" ethtool -I --json -a "$dev" --src "$src" | \
969			jq --arg name "$name" '.[].statistics[$name]'
970		return
971	fi
972
973	run_on "$dev" \
974		ethtool --json -S "$dev" --groups "$grp" -- --src "$src" | \
975		jq --arg grp "$grp" --arg name "$name" '.[][$grp][$name]'
976}
977
978qdisc_stats_get()
979{
980	local dev=$1; shift
981	local handle=$1; shift
982	local selector=$1; shift
983
984	tc -j -s qdisc show dev "$dev" \
985	    | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
986}
987
988qdisc_parent_stats_get()
989{
990	local dev=$1; shift
991	local parent=$1; shift
992	local selector=$1; shift
993
994	tc -j -s qdisc show dev "$dev" invisible \
995	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
996}
997
998ipv6_stats_get()
999{
1000	local dev=$1; shift
1001	local stat=$1; shift
1002
1003	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
1004}
1005
1006hw_stats_get()
1007{
1008	local suite=$1; shift
1009	local if_name=$1; shift
1010	local dir=$1; shift
1011	local stat=$1; shift
1012
1013	ip -j stats show dev $if_name group offload subgroup $suite |
1014		jq ".[0].stats64.$dir.$stat"
1015}
1016
1017__nh_stats_get()
1018{
1019	local key=$1; shift
1020	local group_id=$1; shift
1021	local member_id=$1; shift
1022
1023	ip -j -s -s nexthop show id $group_id |
1024	    jq --argjson member_id "$member_id" --arg key "$key" \
1025	       '.[].group_stats[] | select(.id == $member_id) | .[$key]'
1026}
1027
1028nh_stats_get()
1029{
1030	local group_id=$1; shift
1031	local member_id=$1; shift
1032
1033	__nh_stats_get packets "$group_id" "$member_id"
1034}
1035
1036nh_stats_get_hw()
1037{
1038	local group_id=$1; shift
1039	local member_id=$1; shift
1040
1041	__nh_stats_get packets_hw "$group_id" "$member_id"
1042}
1043
1044humanize()
1045{
1046	local speed=$1; shift
1047
1048	for unit in bps Kbps Mbps Gbps; do
1049		if (($(echo "$speed < 1024" | bc))); then
1050			break
1051		fi
1052
1053		speed=$(echo "scale=1; $speed / 1024" | bc)
1054	done
1055
1056	echo "$speed${unit}"
1057}
1058
1059rate()
1060{
1061	local t0=$1; shift
1062	local t1=$1; shift
1063	local interval=$1; shift
1064
1065	echo $((8 * (t1 - t0) / interval))
1066}
1067
1068packets_rate()
1069{
1070	local t0=$1; shift
1071	local t1=$1; shift
1072	local interval=$1; shift
1073
1074	echo $(((t1 - t0) / interval))
1075}
1076
1077ether_addr_to_u64()
1078{
1079	local addr="$1"
1080	local order="$((1 << 40))"
1081	local val=0
1082	local byte
1083
1084	addr="${addr//:/ }"
1085
1086	for byte in $addr; do
1087		byte="0x$byte"
1088		val=$((val + order * byte))
1089		order=$((order >> 8))
1090	done
1091
1092	printf "0x%x" $val
1093}
1094
1095u64_to_ether_addr()
1096{
1097	local val=$1
1098	local byte
1099	local i
1100
1101	for ((i = 40; i >= 0; i -= 8)); do
1102		byte=$(((val & (0xff << i)) >> i))
1103		printf "%02x" $byte
1104		if [ $i -ne 0 ]; then
1105			printf ":"
1106		fi
1107	done
1108}
1109
1110ipv6_lladdr_get()
1111{
1112	local if_name=$1
1113
1114	ip -j addr show dev $if_name | \
1115		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
1116		head -1
1117}
1118
1119bridge_ageing_time_get()
1120{
1121	local bridge=$1
1122	local ageing_time
1123
1124	# Need to divide by 100 to convert to seconds.
1125	ageing_time=$(ip -j -d link show dev $bridge \
1126		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
1127	echo $((ageing_time / 100))
1128}
1129
1130declare -A SYSCTL_ORIG
1131sysctl_save()
1132{
1133	local key=$1; shift
1134
1135	SYSCTL_ORIG[$key]=$(sysctl -n $key)
1136}
1137
1138sysctl_set()
1139{
1140	local key=$1; shift
1141	local value=$1; shift
1142
1143	sysctl_save "$key"
1144	sysctl -qw $key="$value"
1145}
1146
1147sysctl_restore()
1148{
1149	local key=$1; shift
1150
1151	sysctl -qw $key="${SYSCTL_ORIG[$key]}"
1152}
1153
1154forwarding_enable()
1155{
1156	sysctl_set net.ipv4.conf.all.forwarding 1
1157	sysctl_set net.ipv6.conf.all.forwarding 1
1158}
1159
1160forwarding_restore()
1161{
1162	sysctl_restore net.ipv6.conf.all.forwarding
1163	sysctl_restore net.ipv4.conf.all.forwarding
1164}
1165
1166adf_forwarding_enable()
1167{
1168	forwarding_enable
1169	defer forwarding_restore
1170}
1171
1172declare -A MTU_ORIG
1173mtu_set()
1174{
1175	local dev=$1; shift
1176	local mtu=$1; shift
1177
1178	MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
1179	ip link set dev $dev mtu $mtu
1180}
1181
1182mtu_restore()
1183{
1184	local dev=$1; shift
1185
1186	ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
1187}
1188
1189tc_offload_check()
1190{
1191	local num_netifs=${1:-$NUM_NETIFS}
1192
1193	for ((i = 1; i <= num_netifs; ++i)); do
1194		ethtool -k ${NETIFS[p$i]} \
1195			| grep "hw-tc-offload: on" &> /dev/null
1196		if [[ $? -ne 0 ]]; then
1197			return 1
1198		fi
1199	done
1200
1201	return 0
1202}
1203
1204trap_install()
1205{
1206	local dev=$1; shift
1207	local direction=$1; shift
1208
1209	# Some devices may not support or need in-hardware trapping of traffic
1210	# (e.g. the veth pairs that this library creates for non-existent
1211	# loopbacks). Use continue instead, so that there is a filter in there
1212	# (some tests check counters), and so that other filters are still
1213	# processed.
1214	tc filter add dev $dev $direction pref 1 \
1215		flower skip_sw action trap 2>/dev/null \
1216	    || tc filter add dev $dev $direction pref 1 \
1217		       flower action continue
1218}
1219
1220trap_uninstall()
1221{
1222	local dev=$1; shift
1223	local direction=$1; shift
1224
1225	tc filter del dev $dev $direction pref 1 flower
1226}
1227
1228__icmp_capture_add_del()
1229{
1230	local add_del=$1; shift
1231	local pref=$1; shift
1232	local vsuf=$1; shift
1233	local tundev=$1; shift
1234	local filter=$1; shift
1235
1236	tc filter $add_del dev "$tundev" ingress \
1237	   proto ip$vsuf pref $pref \
1238	   flower ip_proto icmp$vsuf $filter \
1239	   action pass
1240}
1241
1242icmp_capture_install()
1243{
1244	local tundev=$1; shift
1245	local filter=$1; shift
1246
1247	__icmp_capture_add_del add 100 "" "$tundev" "$filter"
1248}
1249
1250icmp_capture_uninstall()
1251{
1252	local tundev=$1; shift
1253	local filter=$1; shift
1254
1255	__icmp_capture_add_del del 100 "" "$tundev" "$filter"
1256}
1257
1258icmp6_capture_install()
1259{
1260	local tundev=$1; shift
1261	local filter=$1; shift
1262
1263	__icmp_capture_add_del add 100 v6 "$tundev" "$filter"
1264}
1265
1266icmp6_capture_uninstall()
1267{
1268	local tundev=$1; shift
1269	local filter=$1; shift
1270
1271	__icmp_capture_add_del del 100 v6 "$tundev" "$filter"
1272}
1273
1274__vlan_capture_add_del()
1275{
1276	local add_del=$1; shift
1277	local pref=$1; shift
1278	local dev=$1; shift
1279	local filter=$1; shift
1280
1281	tc filter $add_del dev "$dev" ingress \
1282	   proto 802.1q pref $pref \
1283	   flower $filter \
1284	   action pass
1285}
1286
1287vlan_capture_install()
1288{
1289	local dev=$1; shift
1290	local filter=$1; shift
1291
1292	__vlan_capture_add_del add 100 "$dev" "$filter"
1293}
1294
1295vlan_capture_uninstall()
1296{
1297	local dev=$1; shift
1298	local filter=$1; shift
1299
1300	__vlan_capture_add_del del 100 "$dev" "$filter"
1301}
1302
1303__dscp_capture_add_del()
1304{
1305	local add_del=$1; shift
1306	local dev=$1; shift
1307	local base=$1; shift
1308	local dscp;
1309
1310	for prio in {0..7}; do
1311		dscp=$((base + prio))
1312		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
1313				       "skip_hw ip_tos $((dscp << 2))"
1314	done
1315}
1316
1317dscp_capture_install()
1318{
1319	local dev=$1; shift
1320	local base=$1; shift
1321
1322	__dscp_capture_add_del add $dev $base
1323}
1324
1325dscp_capture_uninstall()
1326{
1327	local dev=$1; shift
1328	local base=$1; shift
1329
1330	__dscp_capture_add_del del $dev $base
1331}
1332
1333dscp_fetch_stats()
1334{
1335	local dev=$1; shift
1336	local base=$1; shift
1337
1338	for prio in {0..7}; do
1339		local dscp=$((base + prio))
1340		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
1341		echo "[$dscp]=$t "
1342	done
1343}
1344
1345matchall_sink_create()
1346{
1347	local dev=$1; shift
1348
1349	tc qdisc add dev $dev clsact
1350	tc filter add dev $dev ingress \
1351	   pref 10000 \
1352	   matchall \
1353	   action drop
1354}
1355
1356cleanup()
1357{
1358	pre_cleanup
1359	defer_scopes_cleanup
1360}
1361
1362multipath_eval()
1363{
1364	local desc="$1"
1365	local weight_rp12=$2
1366	local weight_rp13=$3
1367	local packets_rp12=$4
1368	local packets_rp13=$5
1369	local weights_ratio packets_ratio diff
1370
1371	RET=0
1372
1373	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1374		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
1375				| bc -l)
1376	else
1377		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
1378				| bc -l)
1379	fi
1380
1381	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
1382	       check_err 1 "Packet difference is 0"
1383	       log_test "Multipath"
1384	       log_info "Expected ratio $weights_ratio"
1385	       return
1386	fi
1387
1388	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
1389		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
1390				| bc -l)
1391	else
1392		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
1393				| bc -l)
1394	fi
1395
1396	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
1397	diff=${diff#-}
1398
1399	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
1400	check_err $? "Too large discrepancy between expected and measured ratios"
1401	log_test "$desc"
1402	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
1403}
1404
1405in_ns()
1406{
1407	local name=$1; shift
1408
1409	ip netns exec $name bash <<-EOF
1410		NUM_NETIFS=0
1411		source lib.sh
1412		$(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done)
1413	EOF
1414}
1415
1416##############################################################################
1417# Tests
1418
1419ping_do()
1420{
1421	local if_name=$1
1422	local dip=$2
1423	local args=$3
1424	local vrf_name
1425
1426	vrf_name=$(master_name_get $if_name)
1427	ip vrf exec $vrf_name \
1428		$PING $args -c $PING_COUNT -i 0.1 \
1429		-w $PING_TIMEOUT $dip &> /dev/null
1430}
1431
1432ping_test()
1433{
1434	RET=0
1435
1436	ping_do $1 $2
1437	check_err $?
1438	log_test "ping$3"
1439}
1440
1441ping_test_fails()
1442{
1443	RET=0
1444
1445	ping_do $1 $2
1446	check_fail $?
1447	log_test "ping fails$3"
1448}
1449
1450ping6_do()
1451{
1452	local if_name=$1
1453	local dip=$2
1454	local args=$3
1455	local vrf_name
1456
1457	vrf_name=$(master_name_get $if_name)
1458	ip vrf exec $vrf_name \
1459		$PING6 $args -c $PING_COUNT -i 0.1 \
1460		-w $PING_TIMEOUT $dip &> /dev/null
1461}
1462
1463ping6_test()
1464{
1465	RET=0
1466
1467	ping6_do $1 $2
1468	check_err $?
1469	log_test "ping6$3"
1470}
1471
1472ping6_test_fails()
1473{
1474	RET=0
1475
1476	ping6_do $1 $2
1477	check_fail $?
1478	log_test "ping6 fails$3"
1479}
1480
1481learning_test()
1482{
1483	local bridge=$1
1484	local br_port1=$2	# Connected to `host1_if`.
1485	local host1_if=$3
1486	local host2_if=$4
1487	local mac=de:ad:be:ef:13:37
1488	local ageing_time
1489
1490	RET=0
1491
1492	bridge -j fdb show br $bridge brport $br_port1 \
1493		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1494	check_fail $? "Found FDB record when should not"
1495
1496	# Disable unknown unicast flooding on `br_port1` to make sure
1497	# packets are only forwarded through the port after a matching
1498	# FDB entry was installed.
1499	bridge link set dev $br_port1 flood off
1500
1501	ip link set $host1_if promisc on
1502	tc qdisc add dev $host1_if ingress
1503	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
1504		flower dst_mac $mac action drop
1505
1506	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1507	sleep 1
1508
1509	tc -j -s filter show dev $host1_if ingress \
1510		| jq -e ".[] | select(.options.handle == 101) \
1511		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1512	check_fail $? "Packet reached first host when should not"
1513
1514	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1515	sleep 1
1516
1517	bridge -j fdb show br $bridge brport $br_port1 \
1518		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1519	check_err $? "Did not find FDB record when should"
1520
1521	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
1522	sleep 1
1523
1524	tc -j -s filter show dev $host1_if ingress \
1525		| jq -e ".[] | select(.options.handle == 101) \
1526		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1527	check_err $? "Packet did not reach second host when should"
1528
1529	# Wait for 10 seconds after the ageing time to make sure FDB
1530	# record was aged-out.
1531	ageing_time=$(bridge_ageing_time_get $bridge)
1532	sleep $((ageing_time + 10))
1533
1534	bridge -j fdb show br $bridge brport $br_port1 \
1535		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1536	check_fail $? "Found FDB record when should not"
1537
1538	bridge link set dev $br_port1 learning off
1539
1540	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
1541	sleep 1
1542
1543	bridge -j fdb show br $bridge brport $br_port1 \
1544		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
1545	check_fail $? "Found FDB record when should not"
1546
1547	bridge link set dev $br_port1 learning on
1548
1549	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
1550	tc qdisc del dev $host1_if ingress
1551	ip link set $host1_if promisc off
1552
1553	bridge link set dev $br_port1 flood on
1554
1555	log_test "FDB learning"
1556}
1557
1558flood_test_do()
1559{
1560	local should_flood=$1
1561	local mac=$2
1562	local ip=$3
1563	local host1_if=$4
1564	local host2_if=$5
1565	local err=0
1566
1567	# Add an ACL on `host2_if` which will tell us whether the packet
1568	# was flooded to it or not.
1569	ip link set $host2_if promisc on
1570	tc qdisc add dev $host2_if ingress
1571	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
1572		flower dst_mac $mac action drop
1573
1574	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
1575	sleep 1
1576
1577	tc -j -s filter show dev $host2_if ingress \
1578		| jq -e ".[] | select(.options.handle == 101) \
1579		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1580	if [[ $? -ne 0 && $should_flood == "true" || \
1581	      $? -eq 0 && $should_flood == "false" ]]; then
1582		err=1
1583	fi
1584
1585	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
1586	tc qdisc del dev $host2_if ingress
1587	ip link set $host2_if promisc off
1588
1589	return $err
1590}
1591
1592flood_unicast_test()
1593{
1594	local br_port=$1
1595	local host1_if=$2
1596	local host2_if=$3
1597	local mac=de:ad:be:ef:13:37
1598	local ip=192.0.2.100
1599
1600	RET=0
1601
1602	bridge link set dev $br_port flood off
1603
1604	flood_test_do false $mac $ip $host1_if $host2_if
1605	check_err $? "Packet flooded when should not"
1606
1607	bridge link set dev $br_port flood on
1608
1609	flood_test_do true $mac $ip $host1_if $host2_if
1610	check_err $? "Packet was not flooded when should"
1611
1612	log_test "Unknown unicast flood"
1613}
1614
1615flood_multicast_test()
1616{
1617	local br_port=$1
1618	local host1_if=$2
1619	local host2_if=$3
1620	local mac=01:00:5e:00:00:01
1621	local ip=239.0.0.1
1622
1623	RET=0
1624
1625	bridge link set dev $br_port mcast_flood off
1626
1627	flood_test_do false $mac $ip $host1_if $host2_if
1628	check_err $? "Packet flooded when should not"
1629
1630	bridge link set dev $br_port mcast_flood on
1631
1632	flood_test_do true $mac $ip $host1_if $host2_if
1633	check_err $? "Packet was not flooded when should"
1634
1635	log_test "Unregistered multicast flood"
1636}
1637
1638flood_test()
1639{
1640	# `br_port` is connected to `host2_if`
1641	local br_port=$1
1642	local host1_if=$2
1643	local host2_if=$3
1644
1645	flood_unicast_test $br_port $host1_if $host2_if
1646	flood_multicast_test $br_port $host1_if $host2_if
1647}
1648
1649__start_traffic()
1650{
1651	local pktsize=$1; shift
1652	local proto=$1; shift
1653	local h_in=$1; shift    # Where the traffic egresses the host
1654	local sip=$1; shift
1655	local dip=$1; shift
1656	local dmac=$1; shift
1657	local -a mz_args=("$@")
1658
1659	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
1660		-a own -b $dmac -t "$proto" -q "${mz_args[@]}" &
1661	sleep 1
1662}
1663
1664start_traffic_pktsize()
1665{
1666	local pktsize=$1; shift
1667	local h_in=$1; shift
1668	local sip=$1; shift
1669	local dip=$1; shift
1670	local dmac=$1; shift
1671	local -a mz_args=("$@")
1672
1673	__start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \
1674			"${mz_args[@]}"
1675}
1676
1677start_tcp_traffic_pktsize()
1678{
1679	local pktsize=$1; shift
1680	local h_in=$1; shift
1681	local sip=$1; shift
1682	local dip=$1; shift
1683	local dmac=$1; shift
1684	local -a mz_args=("$@")
1685
1686	__start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \
1687			"${mz_args[@]}"
1688}
1689
1690start_traffic()
1691{
1692	local h_in=$1; shift
1693	local sip=$1; shift
1694	local dip=$1; shift
1695	local dmac=$1; shift
1696	local -a mz_args=("$@")
1697
1698	start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1699			      "${mz_args[@]}"
1700}
1701
1702start_tcp_traffic()
1703{
1704	local h_in=$1; shift
1705	local sip=$1; shift
1706	local dip=$1; shift
1707	local dmac=$1; shift
1708	local -a mz_args=("$@")
1709
1710	start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
1711				  "${mz_args[@]}"
1712}
1713
1714stop_traffic()
1715{
1716	local pid=${1-%%}; shift
1717
1718	kill_process "$pid"
1719}
1720
1721declare -A cappid
1722declare -A capfile
1723declare -A capout
1724
1725tcpdump_start()
1726{
1727	local if_name=$1; shift
1728	local ns=$1; shift
1729
1730	capfile[$if_name]=$(mktemp)
1731	capout[$if_name]=$(mktemp)
1732
1733	if [ -z $ns ]; then
1734		ns_cmd=""
1735	else
1736		ns_cmd="ip netns exec ${ns}"
1737	fi
1738
1739	if [ -z $SUDO_USER ] ; then
1740		capuser=""
1741	else
1742		capuser="-Z $SUDO_USER"
1743	fi
1744
1745	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
1746		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
1747		> "${capout[$if_name]}" 2>&1 &
1748	cappid[$if_name]=$!
1749
1750	sleep 1
1751}
1752
1753tcpdump_stop_nosleep()
1754{
1755	local if_name=$1
1756	local pid=${cappid[$if_name]}
1757
1758	$ns_cmd kill "$pid" && wait "$pid"
1759}
1760
1761tcpdump_stop()
1762{
1763	tcpdump_stop_nosleep "$1"
1764	sleep 1
1765}
1766
1767tcpdump_cleanup()
1768{
1769	local if_name=$1
1770
1771	rm ${capfile[$if_name]} ${capout[$if_name]}
1772}
1773
1774tcpdump_show()
1775{
1776	local if_name=$1
1777
1778	tcpdump -e -nn -r ${capfile[$if_name]} 2>&1
1779}
1780
1781# return 0 if the packet wasn't seen on host2_if or 1 if it was
1782mcast_packet_test()
1783{
1784	local mac=$1
1785	local src_ip=$2
1786	local ip=$3
1787	local host1_if=$4
1788	local host2_if=$5
1789	local seen=0
1790	local tc_proto="ip"
1791	local mz_v6arg=""
1792
1793	# basic check to see if we were passed an IPv4 address, if not assume IPv6
1794	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
1795		tc_proto="ipv6"
1796		mz_v6arg="-6"
1797	fi
1798
1799	# Add an ACL on `host2_if` which will tell us whether the packet
1800	# was received by it or not.
1801	tc qdisc add dev $host2_if ingress
1802	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
1803		flower ip_proto udp dst_mac $mac action drop
1804
1805	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
1806	sleep 1
1807
1808	tc -j -s filter show dev $host2_if ingress \
1809		| jq -e ".[] | select(.options.handle == 101) \
1810		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
1811	if [[ $? -eq 0 ]]; then
1812		seen=1
1813	fi
1814
1815	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
1816	tc qdisc del dev $host2_if ingress
1817
1818	return $seen
1819}
1820
1821brmcast_check_sg_entries()
1822{
1823	local report=$1; shift
1824	local slist=("$@")
1825	local sarg=""
1826
1827	for src in "${slist[@]}"; do
1828		sarg="${sarg} and .source_list[].address == \"$src\""
1829	done
1830	bridge -j -d -s mdb show dev br0 \
1831		| jq -e ".[].mdb[] | \
1832			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
1833	check_err $? "Wrong *,G entry source list after $report report"
1834
1835	for sgent in "${slist[@]}"; do
1836		bridge -j -d -s mdb show dev br0 \
1837			| jq -e ".[].mdb[] | \
1838				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
1839		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
1840	done
1841}
1842
1843brmcast_check_sg_fwding()
1844{
1845	local should_fwd=$1; shift
1846	local sources=("$@")
1847
1848	for src in "${sources[@]}"; do
1849		local retval=0
1850
1851		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
1852		retval=$?
1853		if [ $should_fwd -eq 1 ]; then
1854			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
1855		else
1856			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
1857		fi
1858	done
1859}
1860
1861brmcast_check_sg_state()
1862{
1863	local is_blocked=$1; shift
1864	local sources=("$@")
1865	local should_fail=1
1866
1867	if [ $is_blocked -eq 1 ]; then
1868		should_fail=0
1869	fi
1870
1871	for src in "${sources[@]}"; do
1872		bridge -j -d -s mdb show dev br0 \
1873			| jq -e ".[].mdb[] | \
1874				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
1875				 .source_list[] |
1876				 select(.address == \"$src\") |
1877				 select(.timer == \"0.00\")" &>/dev/null
1878		check_err_fail $should_fail $? "Entry $src has zero timer"
1879
1880		bridge -j -d -s mdb show dev br0 \
1881			| jq -e ".[].mdb[] | \
1882				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
1883				 .flags[] == \"blocked\")" &>/dev/null
1884		check_err_fail $should_fail $? "Entry $src has blocked flag"
1885	done
1886}
1887
1888mc_join()
1889{
1890	local if_name=$1
1891	local group=$2
1892	local vrf_name=$(master_name_get $if_name)
1893
1894	# We don't care about actual reception, just about joining the
1895	# IP multicast group and adding the L2 address to the device's
1896	# MAC filtering table
1897	ip vrf exec $vrf_name \
1898		mreceive -g $group -I $if_name > /dev/null 2>&1 &
1899	mreceive_pid=$!
1900
1901	sleep 1
1902}
1903
1904mc_leave()
1905{
1906	kill "$mreceive_pid" && wait "$mreceive_pid"
1907}
1908
1909mc_send()
1910{
1911	local if_name=$1
1912	local groups=$2
1913	local vrf_name=$(master_name_get $if_name)
1914
1915	ip vrf exec $vrf_name \
1916		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
1917}
1918
1919adf_mcd_start()
1920{
1921	local ifs=("$@")
1922
1923	local table_name="$MCD_TABLE_NAME"
1924	local smcroutedir
1925	local pid
1926	local if
1927	local i
1928
1929	check_command "$MCD" || return 1
1930	check_command "$MC_CLI" || return 1
1931
1932	smcroutedir=$(mktemp -d)
1933	defer rm -rf "$smcroutedir"
1934
1935	for ((i = 1; i <= NUM_NETIFS; ++i)); do
1936		echo "phyint ${NETIFS[p$i]} enable" >> \
1937			"$smcroutedir/$table_name.conf"
1938	done
1939
1940	for if in "${ifs[@]}"; do
1941		if ! ip_link_has_flag "$if" MULTICAST; then
1942			ip link set dev "$if" multicast on
1943			defer ip link set dev "$if" multicast off
1944		fi
1945
1946		echo "phyint $if enable" >> \
1947			"$smcroutedir/$table_name.conf"
1948	done
1949
1950	"$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
1951		-P "$smcroutedir/$table_name.pid"
1952	busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
1953	pid=$(cat "$smcroutedir/$table_name.pid")
1954	defer kill_process "$pid"
1955}
1956
1957mc_cli()
1958{
1959	local table_name="$MCD_TABLE_NAME"
1960
1961        "$MC_CLI" -I "$table_name" "$@"
1962}
1963
1964start_ip_monitor()
1965{
1966	local mtype=$1; shift
1967	local ip=${1-ip}; shift
1968
1969	# start the monitor in the background
1970	tmpfile=`mktemp /var/run/nexthoptestXXX`
1971	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
1972	sleep 0.2
1973	echo "$mpid $tmpfile"
1974}
1975
1976stop_ip_monitor()
1977{
1978	local mpid=$1; shift
1979	local tmpfile=$1; shift
1980	local el=$1; shift
1981	local what=$1; shift
1982
1983	sleep 0.2
1984	kill $mpid
1985	local lines=`grep '^\w' $tmpfile | wc -l`
1986	test $lines -eq $el
1987	check_err $? "$what: $lines lines of events, expected $el"
1988	rm -rf $tmpfile
1989}
1990
1991hw_stats_monitor_test()
1992{
1993	local dev=$1; shift
1994	local type=$1; shift
1995	local make_suitable=$1; shift
1996	local make_unsuitable=$1; shift
1997	local ip=${1-ip}; shift
1998
1999	RET=0
2000
2001	# Expect a notification about enablement.
2002	local ipmout=$(start_ip_monitor stats "$ip")
2003	$ip stats set dev $dev ${type}_stats on
2004	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
2005
2006	# Expect a notification about offload.
2007	local ipmout=$(start_ip_monitor stats "$ip")
2008	$make_suitable
2009	stop_ip_monitor $ipmout 1 "${type}_stats installation"
2010
2011	# Expect a notification about loss of offload.
2012	local ipmout=$(start_ip_monitor stats "$ip")
2013	$make_unsuitable
2014	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
2015
2016	# Expect a notification about disablement
2017	local ipmout=$(start_ip_monitor stats "$ip")
2018	$ip stats set dev $dev ${type}_stats off
2019	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
2020
2021	log_test "${type}_stats notifications"
2022}
2023
2024ipv4_to_bytes()
2025{
2026	local IP=$1; shift
2027
2028	printf '%02x:' ${IP//./ } |
2029	    sed 's/:$//'
2030}
2031
2032# Convert a given IPv6 address, `IP' such that the :: token, if present, is
2033# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
2034# digits. An optional `BYTESEP' parameter can be given to further separate
2035# individual bytes of each 16-bit group.
2036expand_ipv6()
2037{
2038	local IP=$1; shift
2039	local bytesep=$1; shift
2040
2041	local cvt_ip=${IP/::/_}
2042	local colons=${cvt_ip//[^:]/}
2043	local allcol=:::::::
2044	# IP where :: -> the appropriate number of colons:
2045	local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
2046
2047	echo $allcol_ip | tr : '\n' |
2048	    sed s/^/0000/ |
2049	    sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
2050	    tr '\n' : |
2051	    sed 's/:$//'
2052}
2053
2054ipv6_to_bytes()
2055{
2056	local IP=$1; shift
2057
2058	expand_ipv6 "$IP" :
2059}
2060
2061u16_to_bytes()
2062{
2063	local u16=$1; shift
2064
2065	printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
2066}
2067
2068# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
2069# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
2070# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
2071# stands for 00:00.
2072payload_template_calc_checksum()
2073{
2074	local payload=$1; shift
2075
2076	(
2077	    # Set input radix.
2078	    echo "16i"
2079	    # Push zero for the initial checksum.
2080	    echo 0
2081
2082	    # Pad the payload with a terminating 00: in case we get an odd
2083	    # number of bytes.
2084	    echo "${payload%:}:00:" |
2085		sed 's/CHECKSUM/00:00/g' |
2086		tr '[:lower:]' '[:upper:]' |
2087		# Add the word to the checksum.
2088		sed 's/\(..\):\(..\):/\1\2+\n/g' |
2089		# Strip the extra odd byte we pushed if left unconverted.
2090		sed 's/\(..\):$//'
2091
2092	    echo "10000 ~ +"	# Calculate and add carry.
2093	    echo "FFFF r - p"	# Bit-flip and print.
2094	) |
2095	    dc |
2096	    tr '[:upper:]' '[:lower:]'
2097}
2098
2099payload_template_expand_checksum()
2100{
2101	local payload=$1; shift
2102	local checksum=$1; shift
2103
2104	local ckbytes=$(u16_to_bytes $checksum)
2105
2106	echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
2107}
2108
2109payload_template_nbytes()
2110{
2111	local payload=$1; shift
2112
2113	payload_template_expand_checksum "${payload%:}" 0 |
2114		sed 's/:/\n/g' | wc -l
2115}
2116
2117igmpv3_is_in_get()
2118{
2119	local GRP=$1; shift
2120	local sources=("$@")
2121
2122	local igmpv3
2123	local nsources=$(u16_to_bytes ${#sources[@]})
2124
2125	# IS_IN ( $sources )
2126	igmpv3=$(:
2127		)"22:"$(			: Type - Membership Report
2128		)"00:"$(			: Reserved
2129		)"CHECKSUM:"$(			: Checksum
2130		)"00:00:"$(			: Reserved
2131		)"00:01:"$(			: Number of Group Records
2132		)"01:"$(			: Record Type - IS_IN
2133		)"00:"$(			: Aux Data Len
2134		)"${nsources}:"$(		: Number of Sources
2135		)"$(ipv4_to_bytes $GRP):"$(	: Multicast Address
2136		)"$(for src in "${sources[@]}"; do
2137			ipv4_to_bytes $src
2138			echo -n :
2139		    done)"$(			: Source Addresses
2140		)
2141	local checksum=$(payload_template_calc_checksum "$igmpv3")
2142
2143	payload_template_expand_checksum "$igmpv3" $checksum
2144}
2145
2146igmpv2_leave_get()
2147{
2148	local GRP=$1; shift
2149
2150	local payload=$(:
2151		)"17:"$(			: Type - Leave Group
2152		)"00:"$(			: Max Resp Time - not meaningful
2153		)"CHECKSUM:"$(			: Checksum
2154		)"$(ipv4_to_bytes $GRP)"$(	: Group Address
2155		)
2156	local checksum=$(payload_template_calc_checksum "$payload")
2157
2158	payload_template_expand_checksum "$payload" $checksum
2159}
2160
2161mldv2_is_in_get()
2162{
2163	local SIP=$1; shift
2164	local GRP=$1; shift
2165	local sources=("$@")
2166
2167	local hbh
2168	local icmpv6
2169	local nsources=$(u16_to_bytes ${#sources[@]})
2170
2171	hbh=$(:
2172		)"3a:"$(			: Next Header - ICMPv6
2173		)"00:"$(			: Hdr Ext Len
2174		)"00:00:00:00:00:00:"$(		: Options and Padding
2175		)
2176
2177	icmpv6=$(:
2178		)"8f:"$(			: Type - MLDv2 Report
2179		)"00:"$(			: Code
2180		)"CHECKSUM:"$(			: Checksum
2181		)"00:00:"$(			: Reserved
2182		)"00:01:"$(			: Number of Group Records
2183		)"01:"$(			: Record Type - IS_IN
2184		)"00:"$(			: Aux Data Len
2185		)"${nsources}:"$(		: Number of Sources
2186		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2187		)"$(for src in "${sources[@]}"; do
2188			ipv6_to_bytes $src
2189			echo -n :
2190		    done)"$(			: Source Addresses
2191		)
2192
2193	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2194	local sudohdr=$(:
2195		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2196		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2197	        )"${len}:"$(			: Upper-layer length
2198	        )"00:3a:"$(			: Zero and next-header
2199	        )
2200	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2201
2202	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2203}
2204
2205mldv1_done_get()
2206{
2207	local SIP=$1; shift
2208	local GRP=$1; shift
2209
2210	local hbh
2211	local icmpv6
2212
2213	hbh=$(:
2214		)"3a:"$(			: Next Header - ICMPv6
2215		)"00:"$(			: Hdr Ext Len
2216		)"00:00:00:00:00:00:"$(		: Options and Padding
2217		)
2218
2219	icmpv6=$(:
2220		)"84:"$(			: Type - MLDv1 Done
2221		)"00:"$(			: Code
2222		)"CHECKSUM:"$(			: Checksum
2223		)"00:00:"$(			: Max Resp Delay - not meaningful
2224		)"00:00:"$(			: Reserved
2225		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
2226		)
2227
2228	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
2229	local sudohdr=$(:
2230		)"$(ipv6_to_bytes $SIP):"$(	: SIP
2231		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
2232	        )"${len}:"$(			: Upper-layer length
2233	        )"00:3a:"$(			: Zero and next-header
2234	        )
2235	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
2236
2237	payload_template_expand_checksum "$hbh$icmpv6" $checksum
2238}
2239
2240bail_on_lldpad()
2241{
2242	local reason1="$1"; shift
2243	local reason2="$1"; shift
2244	local caller=${FUNCNAME[1]}
2245	local src=${BASH_SOURCE[1]}
2246
2247	if systemctl is-active --quiet lldpad; then
2248
2249		cat >/dev/stderr <<-EOF
2250		WARNING: lldpad is running
2251
2252			lldpad will likely $reason1, and this test will
2253			$reason2. Both are not supported at the same time,
2254			one of them is arbitrarily going to overwrite the
2255			other. That will cause spurious failures (or, unlikely,
2256			passes) of this test.
2257		EOF
2258
2259		if [[ -z $ALLOW_LLDPAD ]]; then
2260			cat >/dev/stderr <<-EOF
2261
2262				If you want to run the test anyway, please set
2263				an environment variable ALLOW_LLDPAD to a
2264				non-empty string.
2265			EOF
2266			log_test_skip $src:$caller
2267			exit $EXIT_STATUS
2268		else
2269			return
2270		fi
2271	fi
2272}
2273
2274absval()
2275{
2276	local v=$1; shift
2277
2278	echo $((v > 0 ? v : -v))
2279}
2280
2281has_unicast_flt()
2282{
2283	local dev=$1; shift
2284	local mac_addr=$(mac_get $dev)
2285	local tmp=$(ether_addr_to_u64 $mac_addr)
2286	local promisc
2287
2288	ip link set $dev up
2289	ip link add link $dev name macvlan-tmp type macvlan mode private
2290	ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1)))
2291	ip link set macvlan-tmp up
2292
2293	promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity')
2294
2295	ip link del macvlan-tmp
2296
2297	[[ $promisc == 1 ]] && echo "no" || echo "yes"
2298}
2299