1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3
4rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
5ns1="ns1-$rndh"
6ns2="ns2-$rndh"
7ns3="ns3-$rndh"
8capture=false
9ksft_skip=4
10timeout_poll=30
11timeout_test=$((timeout_poll * 2 + 1))
12test_cnt=1
13ret=0
14bail=0
15slack=50
16
17usage() {
18	echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
19	echo -e "\t-b: bail out after first error, otherwise runs al testcases"
20	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
21	echo -e "\t-d: debug this script"
22}
23
24cleanup()
25{
26	rm -f "$cout" "$sout"
27	rm -f "$large" "$small"
28	rm -f "$capout"
29
30	local netns
31	for netns in "$ns1" "$ns2" "$ns3";do
32		ip netns del $netns
33	done
34}
35
36ip -Version > /dev/null 2>&1
37if [ $? -ne 0 ];then
38	echo "SKIP: Could not run test without ip tool"
39	exit $ksft_skip
40fi
41
42#  "$ns1"              ns2                    ns3
43#     ns1eth1    ns2eth1   ns2eth3      ns3eth1
44#            netem
45#     ns1eth2    ns2eth2
46#            netem
47
48setup()
49{
50	large=$(mktemp)
51	small=$(mktemp)
52	sout=$(mktemp)
53	cout=$(mktemp)
54	capout=$(mktemp)
55	size=$((2 * 2048 * 4096))
56
57	dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
58	dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
59
60	trap cleanup EXIT
61
62	for i in "$ns1" "$ns2" "$ns3";do
63		ip netns add $i || exit $ksft_skip
64		ip -net $i link set lo up
65		ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
66		ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
67	done
68
69	ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
70	ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
71	ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
72
73	ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
74	ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
75	ip -net "$ns1" link set ns1eth1 up mtu 1500
76	ip -net "$ns1" route add default via 10.0.1.2
77	ip -net "$ns1" route add default via dead:beef:1::2
78
79	ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
80	ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
81	ip -net "$ns1" link set ns1eth2 up mtu 1500
82	ip -net "$ns1" route add default via 10.0.2.2 metric 101
83	ip -net "$ns1" route add default via dead:beef:2::2 metric 101
84
85	ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
86	ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
87
88	ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
89	ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
90	ip -net "$ns2" link set ns2eth1 up mtu 1500
91
92	ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
93	ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
94	ip -net "$ns2" link set ns2eth2 up mtu 1500
95
96	ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
97	ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
98	ip -net "$ns2" link set ns2eth3 up mtu 1500
99	ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
100	ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
101
102	ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
103	ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
104	ip -net "$ns3" link set ns3eth1 up mtu 1500
105	ip -net "$ns3" route add default via 10.0.3.2
106	ip -net "$ns3" route add default via dead:beef:3::2
107
108	ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
109
110	# debug build can slow down measurably the test program
111	# we use quite tight time limit on the run-time, to ensure
112	# maximum B/W usage.
113	# Use kmemleak/lockdep/kasan/prove_locking presence as a rough
114	# estimate for this being a debug kernel and increase the
115	# maximum run-time accordingly. Observed run times for CI builds
116	# running selftests, including kbuild, were used to determine the
117	# amount of time to add.
118	grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
119}
120
121# $1: ns, $2: port
122wait_local_port_listen()
123{
124	local listener_ns="${1}"
125	local port="${2}"
126
127	local port_hex i
128
129	port_hex="$(printf "%04X" "${port}")"
130	for i in $(seq 10); do
131		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
132			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
133			break
134		sleep 0.1
135	done
136}
137
138do_transfer()
139{
140	local cin=$1
141	local sin=$2
142	local max_time=$3
143	local port
144	port=$((10000+$test_cnt))
145	test_cnt=$((test_cnt+1))
146
147	:> "$cout"
148	:> "$sout"
149	:> "$capout"
150
151	local addr_port
152	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
153
154	if $capture; then
155		local capuser
156		if [ -z $SUDO_USER ] ; then
157			capuser=""
158		else
159			capuser="-Z $SUDO_USER"
160		fi
161
162		local capfile="${rndh}-${port}"
163		local capopt="-i any -s 65535 -B 32768 ${capuser}"
164
165		ip netns exec ${ns3}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
166		local cappid_listener=$!
167
168		ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
169		local cappid_connector=$!
170
171		sleep 1
172	fi
173
174	timeout ${timeout_test} \
175		ip netns exec ${ns3} \
176			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $time \
177				0.0.0.0 < "$sin" > "$sout" &
178	local spid=$!
179
180	wait_local_port_listen "${ns3}" "${port}"
181
182	timeout ${timeout_test} \
183		ip netns exec ${ns1} \
184			./mptcp_connect -jt ${timeout_poll} -p $port -T $time \
185				10.0.3.3 < "$cin" > "$cout" &
186	local cpid=$!
187
188	wait $cpid
189	local retc=$?
190	wait $spid
191	local rets=$?
192
193	if $capture; then
194		sleep 1
195		kill ${cappid_listener}
196		kill ${cappid_connector}
197	fi
198
199	cmp $sin $cout > /dev/null 2>&1
200	local cmps=$?
201	cmp $cin $sout > /dev/null 2>&1
202	local cmpc=$?
203
204	printf "%-16s" " max $max_time "
205	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
206	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
207		echo "[ OK ]"
208		cat "$capout"
209		return 0
210	fi
211
212	echo " [ fail ]"
213	echo "client exit code $retc, server $rets" 1>&2
214	echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
215	ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
216	echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
217	ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
218	ls -l $sin $cout
219	ls -l $cin $sout
220
221	cat "$capout"
222	return 1
223}
224
225run_test()
226{
227	local rate1=$1
228	local rate2=$2
229	local delay1=$3
230	local delay2=$4
231	local lret
232	local dev
233	shift 4
234	local msg=$*
235
236	[ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
237	[ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
238
239	for dev in ns1eth1 ns1eth2; do
240		tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
241	done
242	for dev in ns2eth1 ns2eth2; do
243		tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
244	done
245	tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
246	tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
247	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
248	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
249
250	# time is measured in ms, account for transfer size, affegated link speed
251	# and header overhead (10%)
252	local time=$((size * 8 * 1000 * 10 / (( $rate1 + $rate2) * 1024 *1024 * 9) ))
253
254	# mptcp_connect will do some sleeps to allow the mp_join handshake
255	# completion (see mptcp_connect): 200ms on each side, add some slack
256	time=$((time + 400 + slack))
257
258	printf "%-60s" "$msg"
259	do_transfer $small $large $time
260	lret=$?
261	if [ $lret -ne 0 ]; then
262		ret=$lret
263		[ $bail -eq 0 ] || exit $ret
264	fi
265
266	printf "%-60s" "$msg - reverse direction"
267	do_transfer $large $small $time
268	lret=$?
269	if [ $lret -ne 0 ]; then
270		ret=$lret
271		[ $bail -eq 0 ] || exit $ret
272	fi
273}
274
275while getopts "bcdh" option;do
276	case "$option" in
277	"h")
278		usage $0
279		exit 0
280		;;
281	"b")
282		bail=1
283		;;
284	"c")
285		capture=true
286		;;
287	"d")
288		set -x
289		;;
290	"?")
291		usage $0
292		exit 1
293		;;
294	esac
295done
296
297setup
298run_test 10 10 0 0 "balanced bwidth"
299run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
300
301# we still need some additional infrastructure to pass the following test-cases
302run_test 30 10 0 0 "unbalanced bwidth"
303run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
304run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
305exit $ret
306