1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# This tests basic flowtable functionality. 5# Creates following default topology: 6# 7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) 8# Router1 is the one doing flow offloading, Router2 has no special 9# purpose other than having a link that is smaller than either Originator 10# and responder, i.e. TCPMSS announced values are too large and will still 11# result in fragmentation and/or PMTU discovery. 12# 13# You can check with different Orgininator/Link/Responder MTU eg: 14# nft_flowtable.sh -o8000 -l1500 -r2000 15# 16 17 18# Kselftest framework requirement - SKIP code is 4. 19ksft_skip=4 20ret=0 21 22ns1in="" 23ns2in="" 24ns1out="" 25ns2out="" 26 27log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) 28 29checktool (){ 30 if ! $1 > /dev/null 2>&1; then 31 echo "SKIP: Could not $2" 32 exit $ksft_skip 33 fi 34} 35 36checktool "nft --version" "run test without nft tool" 37checktool "ip -Version" "run test without ip tool" 38checktool "which nc" "run test without nc (netcat)" 39checktool "ip netns add nsr1" "create net namespace" 40 41ip netns add ns1 42ip netns add ns2 43 44ip netns add nsr2 45 46cleanup() { 47 for i in 1 2; do 48 ip netns del ns$i 49 ip netns del nsr$i 50 done 51 52 rm -f "$ns1in" "$ns1out" 53 rm -f "$ns2in" "$ns2out" 54 55 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns 56} 57 58trap cleanup EXIT 59 60sysctl -q net.netfilter.nf_log_all_netns=1 61 62ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 63ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 64 65ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 66 67for dev in lo veth0 veth1; do 68 for i in 1 2; do 69 ip -net nsr$i link set $dev up 70 done 71done 72 73ip -net nsr1 addr add 10.0.1.1/24 dev veth0 74ip -net nsr1 addr add dead:1::1/64 dev veth0 75 76ip -net nsr2 addr add 10.0.2.1/24 dev veth1 77ip -net nsr2 addr add dead:2::1/64 dev veth1 78 79# set different MTUs so we need to push packets coming from ns1 (large MTU) 80# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), 81# or to do PTMU discovery (send ICMP error back to originator). 82# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers 83# is NOT the lowest link mtu. 84 85omtu=9000 86lmtu=1500 87rmtu=2000 88 89usage(){ 90 echo "nft_flowtable.sh [OPTIONS]" 91 echo 92 echo "MTU options" 93 echo " -o originator" 94 echo " -l link" 95 echo " -r responder" 96 exit 1 97} 98 99while getopts "o:l:r:" o 100do 101 case $o in 102 o) omtu=$OPTARG;; 103 l) lmtu=$OPTARG;; 104 r) rmtu=$OPTARG;; 105 *) usage;; 106 esac 107done 108 109if ! ip -net nsr1 link set veth0 mtu $omtu; then 110 exit 1 111fi 112 113ip -net ns1 link set eth0 mtu $omtu 114 115if ! ip -net nsr2 link set veth1 mtu $rmtu; then 116 exit 1 117fi 118 119ip -net ns2 link set eth0 mtu $rmtu 120 121# transfer-net between nsr1 and nsr2. 122# these addresses are not used for connections. 123ip -net nsr1 addr add 192.168.10.1/24 dev veth1 124ip -net nsr1 addr add fee1:2::1/64 dev veth1 125 126ip -net nsr2 addr add 192.168.10.2/24 dev veth0 127ip -net nsr2 addr add fee1:2::2/64 dev veth0 128 129for i in 1 2; do 130 ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null 131 ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null 132 133 ip -net ns$i link set lo up 134 ip -net ns$i link set eth0 up 135 ip -net ns$i addr add 10.0.$i.99/24 dev eth0 136 ip -net ns$i route add default via 10.0.$i.1 137 ip -net ns$i addr add dead:$i::99/64 dev eth0 138 ip -net ns$i route add default via dead:$i::1 139 if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then 140 echo "ERROR: Check Originator/Responder values (problem during address addition)" 141 exit 1 142 fi 143 144 # don't set ip DF bit for first two tests 145 ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null 146done 147 148ip -net nsr1 route add default via 192.168.10.2 149ip -net nsr2 route add default via 192.168.10.1 150 151ip netns exec nsr1 nft -f - <<EOF 152table inet filter { 153 flowtable f1 { 154 hook ingress priority 0 155 devices = { veth0, veth1 } 156 } 157 158 chain forward { 159 type filter hook forward priority 0; policy drop; 160 161 # flow offloaded? Tag ct with mark 1, so we can detect when it fails. 162 meta oif "veth1" tcp dport 12345 flow offload @f1 counter 163 164 # use packet size to trigger 'should be offloaded by now'. 165 # otherwise, if 'flow offload' expression never offloads, the 166 # test will pass. 167 tcp dport 12345 meta length gt 200 ct mark set 1 counter 168 169 # this turns off flow offloading internally, so expect packets again 170 tcp flags fin,rst ct mark set 0 accept 171 172 # this allows large packets from responder, we need this as long 173 # as PMTUd is off. 174 # This rule is deleted for the last test, when we expect PMTUd 175 # to kick in and ensure all packets meet mtu requirements. 176 meta length gt $lmtu accept comment something-to-grep-for 177 178 # next line blocks connection w.o. working offload. 179 # we only do this for reverse dir, because we expect packets to 180 # enter slow path due to MTU mismatch of veth0 and veth1. 181 tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop 182 183 ct state established,related accept 184 185 # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) 186 meta length lt 200 oif "veth1" tcp dport 12345 counter accept 187 188 meta nfproto ipv4 meta l4proto icmp accept 189 meta nfproto ipv6 meta l4proto icmpv6 accept 190 } 191} 192EOF 193 194if [ $? -ne 0 ]; then 195 echo "SKIP: Could not load nft ruleset" 196 exit $ksft_skip 197fi 198 199# test basic connectivity 200if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then 201 echo "ERROR: ns1 cannot reach ns2" 1>&2 202 bash 203 exit 1 204fi 205 206if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then 207 echo "ERROR: ns2 cannot reach ns1" 1>&2 208 exit 1 209fi 210 211if [ $ret -eq 0 ];then 212 echo "PASS: netns routing/connectivity: ns1 can reach ns2" 213fi 214 215ns1in=$(mktemp) 216ns1out=$(mktemp) 217ns2in=$(mktemp) 218ns2out=$(mktemp) 219 220make_file() 221{ 222 name=$1 223 224 SIZE=$((RANDOM % (1024 * 8))) 225 TSIZE=$((SIZE * 1024)) 226 227 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null 228 229 SIZE=$((RANDOM % 1024)) 230 SIZE=$((SIZE + 128)) 231 TSIZE=$((TSIZE + SIZE)) 232 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null 233} 234 235check_transfer() 236{ 237 in=$1 238 out=$2 239 what=$3 240 241 if ! cmp "$in" "$out" > /dev/null 2>&1; then 242 echo "FAIL: file mismatch for $what" 1>&2 243 ls -l "$in" 244 ls -l "$out" 245 return 1 246 fi 247 248 return 0 249} 250 251test_tcp_forwarding_ip() 252{ 253 local nsa=$1 254 local nsb=$2 255 local dstip=$3 256 local dstport=$4 257 local lret=0 258 259 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & 260 lpid=$! 261 262 sleep 1 263 ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" & 264 cpid=$! 265 266 sleep 3 267 268 if ps -p $lpid > /dev/null;then 269 kill $lpid 270 fi 271 272 if ps -p $cpid > /dev/null;then 273 kill $cpid 274 fi 275 276 wait 277 278 if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then 279 lret=1 280 fi 281 282 if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then 283 lret=1 284 fi 285 286 return $lret 287} 288 289test_tcp_forwarding() 290{ 291 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 292 293 return $? 294} 295 296test_tcp_forwarding_nat() 297{ 298 local lret 299 300 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 301 lret=$? 302 303 if [ $lret -eq 0 ] ; then 304 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 305 lret=$? 306 fi 307 308 return $lret 309} 310 311make_file "$ns1in" 312make_file "$ns2in" 313 314# First test: 315# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. 316if test_tcp_forwarding ns1 ns2; then 317 echo "PASS: flow offloaded for ns1/ns2" 318else 319 echo "FAIL: flow offload for ns1/ns2:" 1>&2 320 ip netns exec nsr1 nft list ruleset 321 ret=1 322fi 323 324# delete default route, i.e. ns2 won't be able to reach ns1 and 325# will depend on ns1 being masqueraded in nsr1. 326# expect ns1 has nsr1 address. 327ip -net ns2 route del default via 10.0.2.1 328ip -net ns2 route del default via dead:2::1 329ip -net ns2 route add 192.168.10.1 via 10.0.2.1 330 331# Second test: 332# Same, but with NAT enabled. 333ip netns exec nsr1 nft -f - <<EOF 334table ip nat { 335 chain prerouting { 336 type nat hook prerouting priority 0; policy accept; 337 meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 338 } 339 340 chain postrouting { 341 type nat hook postrouting priority 0; policy accept; 342 meta oifname "veth1" counter masquerade 343 } 344} 345EOF 346 347if test_tcp_forwarding_nat ns1 ns2; then 348 echo "PASS: flow offloaded for ns1/ns2 with NAT" 349else 350 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 351 ip netns exec nsr1 nft list ruleset 352 ret=1 353fi 354 355# Third test: 356# Same as second test, but with PMTU discovery enabled. 357handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) 358 359if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then 360 echo "FAIL: Could not delete large-packet accept rule" 361 exit 1 362fi 363 364ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 365ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 366 367if test_tcp_forwarding_nat ns1 ns2; then 368 echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" 369else 370 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 371 ip netns exec nsr1 nft list ruleset 372fi 373 374# Another test: 375# Add bridge interface br0 to Router1, with NAT enabled. 376ip -net nsr1 link add name br0 type bridge 377ip -net nsr1 addr flush dev veth0 378ip -net nsr1 link set up dev veth0 379ip -net nsr1 link set veth0 master br0 380ip -net nsr1 addr add 10.0.1.1/24 dev br0 381ip -net nsr1 addr add dead:1::1/64 dev br0 382ip -net nsr1 link set up dev br0 383 384ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null 385 386# br0 with NAT enabled. 387ip netns exec nsr1 nft -f - <<EOF 388flush table ip nat 389table ip nat { 390 chain prerouting { 391 type nat hook prerouting priority 0; policy accept; 392 meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 393 } 394 395 chain postrouting { 396 type nat hook postrouting priority 0; policy accept; 397 meta oifname "veth1" counter masquerade 398 } 399} 400EOF 401 402if test_tcp_forwarding_nat ns1 ns2; then 403 echo "PASS: flow offloaded for ns1/ns2 with bridge NAT" 404else 405 echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 406 ip netns exec nsr1 nft list ruleset 407 ret=1 408fi 409 410# Another test: 411# Add bridge interface br0 to Router1, with NAT and VLAN. 412ip -net nsr1 link set veth0 nomaster 413ip -net nsr1 link set down dev veth0 414ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10 415ip -net nsr1 link set up dev veth0 416ip -net nsr1 link set up dev veth0.10 417ip -net nsr1 link set veth0.10 master br0 418 419ip -net ns1 addr flush dev eth0 420ip -net ns1 link add link eth0 name eth0.10 type vlan id 10 421ip -net ns1 link set eth0 up 422ip -net ns1 link set eth0.10 up 423ip -net ns1 addr add 10.0.1.99/24 dev eth0.10 424ip -net ns1 route add default via 10.0.1.1 425ip -net ns1 addr add dead:1::99/64 dev eth0.10 426 427if test_tcp_forwarding_nat ns1 ns2; then 428 echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN" 429else 430 echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 431 ip netns exec nsr1 nft list ruleset 432 ret=1 433fi 434 435# restore test topology (remove bridge and VLAN) 436ip -net nsr1 link set veth0 nomaster 437ip -net nsr1 link set veth0 down 438ip -net nsr1 link set veth0.10 down 439ip -net nsr1 link delete veth0.10 type vlan 440ip -net nsr1 link delete br0 type bridge 441ip -net ns1 addr flush dev eth0.10 442ip -net ns1 link set eth0.10 down 443ip -net ns1 link set eth0 down 444ip -net ns1 link delete eth0.10 type vlan 445 446# restore address in ns1 and nsr1 447ip -net ns1 link set eth0 up 448ip -net ns1 addr add 10.0.1.99/24 dev eth0 449ip -net ns1 route add default via 10.0.1.1 450ip -net ns1 addr add dead:1::99/64 dev eth0 451ip -net ns1 route add default via dead:1::1 452ip -net nsr1 addr add 10.0.1.1/24 dev veth0 453ip -net nsr1 addr add dead:1::1/64 dev veth0 454ip -net nsr1 link set up dev veth0 455 456KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) 457KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) 458SPI1=$RANDOM 459SPI2=$RANDOM 460 461if [ $SPI1 -eq $SPI2 ]; then 462 SPI2=$((SPI2+1)) 463fi 464 465do_esp() { 466 local ns=$1 467 local me=$2 468 local remote=$3 469 local lnet=$4 470 local rnet=$5 471 local spi_out=$6 472 local spi_in=$7 473 474 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet 475 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet 476 477 # to encrypt packets as they go out (includes forwarded packets that need encapsulation) 478 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow 479 # to fwd decrypted packets after esp processing: 480 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow 481 482} 483 484do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 485 486do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 487 488ip netns exec nsr1 nft delete table ip nat 489 490# restore default routes 491ip -net ns2 route del 192.168.10.1 via 10.0.2.1 492ip -net ns2 route add default via 10.0.2.1 493ip -net ns2 route add default via dead:2::1 494 495if test_tcp_forwarding ns1 ns2; then 496 echo "PASS: ipsec tunnel mode for ns1/ns2" 497else 498 echo "FAIL: ipsec tunnel mode for ns1/ns2" 499 ip netns exec nsr1 nft list ruleset 1>&2 500 ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 501fi 502 503exit $ret 504