aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile3
-rw-r--r--tools/testing/selftests/net/config7
-rwxr-xr-xtools/testing/selftests/net/drop_monitor_tests.sh215
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh44
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh70
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh43
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_asymmetric.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_symmetric.sh10
-rw-r--r--tools/testing/selftests/net/ipsec.c2195
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile3
-rw-r--r--tools/testing/selftests/net/mptcp/config1
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c22
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh21
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh193
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh293
-rw-r--r--tools/testing/selftests/net/nettest.c2
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh16
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh5
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c42
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh626
22 files changed, 3773 insertions, 51 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 742c499328b2..61ae899cfc17 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+ipsec
msg_zerocopy
socket
psock_fanout
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9491bbaa0831..ef352477cac6 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -19,6 +19,8 @@ TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh
TEST_PROGS += devlink_port_split.py
+TEST_PROGS += drop_monitor_tests.sh
+TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -29,6 +31,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
+TEST_GEN_FILES += ipsec
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3b42c06b5985..4d5df8e1eee7 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -24,10 +24,13 @@ CONFIG_IP_NF_NAT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NFT_NAT=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
+CONFIG_TRACEPOINTS=y
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_FOU=m
diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh
new file mode 100755
index 000000000000..b7650e30d18b
--- /dev/null
+++ b/tools/testing/selftests/net/drop_monitor_tests.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking drop monitor functionality.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+ sw_drops
+ hw_drops
+"
+
+IP="ip -netns ns1"
+TC="tc -netns ns1"
+DEVLINK="devlink -N ns1"
+NS_EXEC="ip netns exec ns1"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ fi
+}
+
+setup()
+{
+ modprobe netdevsim &> /dev/null
+
+ set -e
+ ip netns add ns1
+ $IP link add dummy10 up type dummy
+
+ $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ udevadm settle
+ local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+ $IP link set dev $netdev up
+
+ set +e
+}
+
+cleanup()
+{
+ $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ ip netns del ns1
+}
+
+sw_drops_test()
+{
+ echo
+ echo "Software drops test"
+
+ setup
+
+ local dir=$(mktemp -d)
+
+ $TC qdisc add dev dummy10 clsact
+ $TC filter add dev dummy10 egress pref 1 handle 101 proto ip \
+ flower dst_ip 192.0.2.10 action drop
+
+ $NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \
+ -A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \
+ -d 100msec &
+ timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0))
+ log_test $? 0 "Capturing active software drops"
+
+ rm ${dir}/packets.pcap
+
+ { kill %% && wait %%; } 2>/dev/null
+ timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0))
+ log_test $? 0 "Capturing inactive software drops"
+
+ rm -r $dir
+
+ cleanup
+}
+
+hw_drops_test()
+{
+ echo
+ echo "Hardware drops test"
+
+ setup
+
+ local dir=$(mktemp -d)
+
+ $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap
+ timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+ | wc -l) != 0))
+ log_test $? 0 "Capturing active hardware drops"
+
+ rm ${dir}/packets.pcap
+
+ $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop
+ timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+ | wc -l) == 0))
+ log_test $? 0 "Capturing inactive hardware drops"
+
+ rm -r $dir
+
+ cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v devlink)" ]; then
+ echo "SKIP: Could not run test without devlink tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tshark)" ]; then
+ echo "SKIP: Could not run test without tshark tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v dwdump)" ]; then
+ echo "SKIP: Could not run test without dwdump tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v udevadm)" ]; then
+ echo "SKIP: Could not run test without udevadm tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v timeout)" ]; then
+ echo "SKIP: Could not run test without timeout tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+tshark -G fields 2> /dev/null | grep -q net_dm
+if [ $? -ne 0 ]; then
+ echo "SKIP: tshark too old, missing net_dm dissector"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ sw_drops|sw) sw_drops_test;;
+ hw_drops|hw) hw_drops_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 22dc2f3d428b..eb693a3b7b4a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -411,9 +411,16 @@ ipv6_fdb_grp_fcnal()
run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103"
log_test $? 2 "Route add with fdb nexthop group"
+ run_cmd "$IP nexthop del id 61"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 0 "Fdb entry after deleting a single nexthop"
+
run_cmd "$IP nexthop del id 102"
log_test $? 0 "Fdb nexthop delete"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 254 "Fdb entry after deleting a nexthop group"
+
$IP link del dev vx10
}
@@ -484,9 +491,16 @@ ipv4_fdb_grp_fcnal()
run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
log_test $? 2 "Route add with fdb nexthop group"
+ run_cmd "$IP nexthop del id 12"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 0 "Fdb entry after deleting a single nexthop"
+
run_cmd "$IP nexthop del id 102"
log_test $? 0 "Fdb nexthop delete"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 254 "Fdb entry after deleting a nexthop group"
+
$IP link del dev vx10
}
@@ -739,6 +753,36 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
+ run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 87"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after removing v4 gateways"
+
+ run_cmd "$IP ro delete 2001:db8:101::1/128"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop replace id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after replacing v4 gateways"
+
$IP nexthop flush >/dev/null 2>&1
#
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 75fe24bcb9cd..9c12c4fd3afc 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -5,7 +5,7 @@
# Defines
if [[ ! -v DEVLINK_DEV ]]; then
- DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+ DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
@@ -117,6 +117,12 @@ devlink_reload()
declare -A DEVLINK_ORIG
+# Changing pool type from static to dynamic causes reinterpretation of threshold
+# values. They therefore need to be saved before pool type is changed, then the
+# pool type can be changed, and then the new values need to be set up. Therefore
+# instead of saving the current state implicitly in the _set call, provide
+# functions for all three primitives: save, set, and restore.
+
devlink_port_pool_threshold()
{
local port=$1; shift
@@ -126,14 +132,21 @@ devlink_port_pool_threshold()
| jq '.port_pool."'"$port"'"[].threshold'
}
-devlink_port_pool_th_set()
+devlink_port_pool_th_save()
{
local port=$1; shift
local pool=$1; shift
- local th=$1; shift
local key="port_pool($port,$pool).threshold"
DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
+}
+
+devlink_port_pool_th_set()
+{
+ local port=$1; shift
+ local pool=$1; shift
+ local th=$1; shift
+
devlink sb port pool set $port pool $pool th $th
}
@@ -142,8 +155,13 @@ devlink_port_pool_th_restore()
local port=$1; shift
local pool=$1; shift
local key="port_pool($port,$pool).threshold"
+ local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]}
+ if [[ -z $orig ]]; then
+ echo "WARNING: Mismatched devlink_port_pool_th_restore"
+ else
+ devlink sb port pool set $port pool $pool th $orig
+ fi
}
devlink_pool_size_thtype()
@@ -154,14 +172,20 @@ devlink_pool_size_thtype()
| jq -r '.pool[][] | (.size, .thtype)'
}
+devlink_pool_size_thtype_save()
+{
+ local pool=$1; shift
+ local key="pool($pool).size_thtype"
+
+ DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
+}
+
devlink_pool_size_thtype_set()
{
local pool=$1; shift
local thtype=$1; shift
local size=$1; shift
- local key="pool($pool).size_thtype"
- DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype
}
@@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore()
local key="pool($pool).size_thtype"
local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb pool set "$DEVLINK_DEV" pool $pool \
- size ${orig[0]} thtype ${orig[1]}
+ if [[ -z ${orig[0]} ]]; then
+ echo "WARNING: Mismatched devlink_pool_size_thtype_restore"
+ else
+ devlink sb pool set "$DEVLINK_DEV" pool $pool \
+ size ${orig[0]} thtype ${orig[1]}
+ fi
}
devlink_tc_bind_pool_th()
@@ -185,6 +213,16 @@ devlink_tc_bind_pool_th()
| jq -r '.tc_bind[][] | (.pool, .threshold)'
}
+devlink_tc_bind_pool_th_save()
+{
+ local port=$1; shift
+ local tc=$1; shift
+ local dir=$1; shift
+ local key="tc_bind($port,$dir,$tc).pool_th"
+
+ DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
+}
+
devlink_tc_bind_pool_th_set()
{
local port=$1; shift
@@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set()
local dir=$1; shift
local pool=$1; shift
local th=$1; shift
- local key="tc_bind($port,$dir,$tc).pool_th"
- DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
devlink sb tc bind set $port tc $tc type $dir pool $pool th $th
}
@@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore()
local key="tc_bind($port,$dir,$tc).pool_th"
local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb tc bind set $port tc $tc type $dir \
- pool ${orig[0]} th ${orig[1]}
+ if [[ -z ${orig[0]} ]]; then
+ echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore"
+ else
+ devlink sb tc bind set $port tc $tc type $dir \
+ pool ${orig[0]} th ${orig[1]}
+ fi
}
devlink_traps_num_get()
@@ -509,3 +549,9 @@ devlink_cpu_port_get()
echo "$DEVLINK_DEV/$cpu_dl_port_num"
}
+
+devlink_cell_size_get()
+{
+ devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
+ | jq '.pool[][].cell_size'
+}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 977fc2b326a2..927f9ba49e08 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1227,3 +1227,46 @@ stop_traffic()
# Suppress noise from killing mausezahn.
{ kill %% && wait %%; } 2>/dev/null
}
+
+tcpdump_start()
+{
+ local if_name=$1; shift
+ local ns=$1; shift
+
+ capfile=$(mktemp)
+ capout=$(mktemp)
+
+ if [ -z $ns ]; then
+ ns_cmd=""
+ else
+ ns_cmd="ip netns exec ${ns}"
+ fi
+
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ $ns_cmd tcpdump -e -n -Q in -i $if_name \
+ -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+ cappid=$!
+
+ sleep 1
+}
+
+tcpdump_stop()
+{
+ $ns_cmd kill $cappid
+ sleep 1
+}
+
+tcpdump_cleanup()
+{
+ rm $capfile $capout
+}
+
+tcpdump_show()
+{
+ tcpdump -e -n -r $capfile 2>&1
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index c33bfd7ba214..13db1cb50e57 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -31,7 +31,7 @@ mirror_test()
local t0=$(tc_rule_stats_get $dev $pref)
$MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
- -c 10 -d 100ms -t icmp type=8
+ -c 10 -d 100msec -t icmp type=8
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
index a0b5f57d6bd3..0727e2012b68 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
@@ -215,10 +215,16 @@ switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
switch_destroy()
{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
@@ -359,6 +365,10 @@ ns_switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
export -f ns_switch_create
diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
index 1209031bc794..5d97fa347d75 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
@@ -237,10 +237,16 @@ switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
switch_destroy()
{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
@@ -402,6 +408,10 @@ ns_switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
export -f ns_switch_create
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
new file mode 100644
index 000000000000..17ced7d6ce25
--- /dev/null
+++ b/tools/testing/selftests/net/ipsec.c
@@ -0,0 +1,2195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ipsec.c - Check xfrm on veth inside a net-ns.
+ * Copyright (c) 2018 Dmitry Safonov
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <linux/xfrm.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define printk(fmt, ...) \
+ ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
+
+#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__)
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */
+#define MAX_PAYLOAD 2048
+#define XFRM_ALGO_KEY_BUF_SIZE 512
+#define MAX_PROCESSES (1 << 14) /* /16 mask divided by /30 subnets */
+#define INADDR_A ((in_addr_t) 0x0a000000) /* 10.0.0.0 */
+#define INADDR_B ((in_addr_t) 0xc0a80000) /* 192.168.0.0 */
+
+/* /30 mask for one veth connection */
+#define PREFIX_LEN 30
+#define child_ip(nr) (4*nr + 1)
+#define grchild_ip(nr) (4*nr + 2)
+
+#define VETH_FMT "ktst-%d"
+#define VETH_LEN 12
+
+static int nsfd_parent = -1;
+static int nsfd_childa = -1;
+static int nsfd_childb = -1;
+static long page_size;
+
+/*
+ * ksft_cnt is static in kselftest, so isn't shared with children.
+ * We have to send a test result back to parent and count there.
+ * results_fd is a pipe with test feedback from children.
+ */
+static int results_fd[2];
+
+const unsigned int ping_delay_nsec = 50 * 1000 * 1000;
+const unsigned int ping_timeout = 300;
+const unsigned int ping_count = 100;
+const unsigned int ping_success = 80;
+
+static void randomize_buffer(void *buf, size_t buflen)
+{
+ int *p = (int *)buf;
+ size_t words = buflen / sizeof(int);
+ size_t leftover = buflen % sizeof(int);
+
+ if (!buflen)
+ return;
+
+ while (words--)
+ *p++ = rand();
+
+ if (leftover) {
+ int tmp = rand();
+
+ memcpy(buf + buflen - leftover, &tmp, leftover);
+ }
+
+ return;
+}
+
+static int unshare_open(void)
+{
+ const char *netns_path = "/proc/self/ns/net";
+ int fd;
+
+ if (unshare(CLONE_NEWNET) != 0) {
+ pr_err("unshare()");
+ return -1;
+ }
+
+ fd = open(netns_path, O_RDONLY);
+ if (fd <= 0) {
+ pr_err("open(%s)", netns_path);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int switch_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWNET)) {
+ pr_err("setns()");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Running the test inside a new parent net namespace to bother less
+ * about cleanup on error-path.
+ */
+static int init_namespaces(void)
+{
+ nsfd_parent = unshare_open();
+ if (nsfd_parent <= 0)
+ return -1;
+
+ nsfd_childa = unshare_open();
+ if (nsfd_childa <= 0)
+ return -1;
+
+ if (switch_ns(nsfd_parent))
+ return -1;
+
+ nsfd_childb = unshare_open();
+ if (nsfd_childb <= 0)
+ return -1;
+
+ if (switch_ns(nsfd_parent))
+ return -1;
+ return 0;
+}
+
+static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
+{
+ if (*sock > 0) {
+ seq_nr++;
+ return 0;
+ }
+
+ *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
+ if (*sock <= 0) {
+ pr_err("socket(AF_NETLINK)");
+ return -1;
+ }
+
+ randomize_buffer(seq_nr, sizeof(*seq_nr));
+
+ return 0;
+}
+
+static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
+{
+ return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
+}
+
+static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
+ struct rtattr *attr = rtattr_hdr(nh);
+ size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
+
+ if (req_sz < nl_size) {
+ printk("req buf is too small: %zu < %zu", req_sz, nl_size);
+ return -1;
+ }
+ nh->nlmsg_len = nl_size;
+
+ attr->rta_len = RTA_LENGTH(size);
+ attr->rta_type = rta_type;
+ memcpy(RTA_DATA(attr), payload, size);
+
+ return 0;
+}
+
+static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ struct rtattr *ret = rtattr_hdr(nh);
+
+ if (rtattr_pack(nh, req_sz, rta_type, payload, size))
+ return 0;
+
+ return ret;
+}
+
+static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type)
+{
+ return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
+}
+
+static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ char *nlmsg_end = (char *)nh + nh->nlmsg_len;
+
+ attr->rta_len = nlmsg_end - (char *)attr;
+}
+
+static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
+ const char *peer, int ns)
+{
+ struct ifinfomsg pi;
+ struct rtattr *peer_attr;
+
+ memset(&pi, 0, sizeof(pi));
+ pi.ifi_family = AF_UNSPEC;
+ pi.ifi_change = 0xFFFFFFFF;
+
+ peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
+ if (!peer_attr)
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
+ return -1;
+
+ rtattr_end(nh, peer_attr);
+
+ return 0;
+}
+
+static int netlink_check_answer(int sock)
+{
+ struct nlmsgerror {
+ struct nlmsghdr hdr;
+ int error;
+ struct nlmsghdr orig_msg;
+ } answer;
+
+ if (recv(sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type);
+ return -1;
+ } else if (answer.error) {
+ printk("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ return answer.error;
+ }
+
+ return 0;
+}
+
+static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a,
+ const char *peerb, int ns_b)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ const char veth_type[] = "veth";
+ struct rtattr *link_info, *info_data;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
+ return -1;
+
+ link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+ if (!link_info)
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
+ return -1;
+
+ info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+ if (!info_data)
+ return -1;
+
+ if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b))
+ return -1;
+
+ rtattr_end(&req.nh, info_data);
+ rtattr_end(&req.nh, link_info);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int ip4_addr_set(int sock, uint32_t seq, const char *intf,
+ struct in_addr addr, uint8_t prefix)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifaddrmsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWADDR;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifa_family = AF_INET;
+ req.info.ifa_prefixlen = prefix;
+ req.info.ifa_index = if_nametoindex(intf);
+
+#ifdef DEBUG
+ {
+ char addr_str[IPV4_STR_SZ] = {};
+
+ strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1);
+
+ printk("ip addr set %s", addr_str);
+ }
+#endif
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int link_set_up(int sock, uint32_t seq, const char *intf)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+ req.info.ifi_index = if_nametoindex(intf);
+ req.info.ifi_flags = IFF_UP;
+ req.info.ifi_change = IFF_UP;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int ip4_route_set(int sock, uint32_t seq, const char *intf,
+ struct in_addr src, struct in_addr dst)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rt;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ unsigned int index = if_nametoindex(intf);
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+ req.nh.nlmsg_type = RTM_NEWROUTE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+ req.nh.nlmsg_seq = seq;
+ req.rt.rtm_family = AF_INET;
+ req.rt.rtm_dst_len = 32;
+ req.rt.rtm_table = RT_TABLE_MAIN;
+ req.rt.rtm_protocol = RTPROT_BOOT;
+ req.rt.rtm_scope = RT_SCOPE_LINK;
+ req.rt.rtm_type = RTN_UNICAST;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(sock);
+}
+
+static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ if (ip4_addr_set(route_sock, (*route_seq)++, "lo",
+ tunsrc, PREFIX_LEN)) {
+ printk("Failed to set ipv4 addr");
+ return -1;
+ }
+
+ if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) {
+ printk("Failed to set ipv4 route");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst)
+{
+ struct in_addr intsrc = inet_makeaddr(INADDR_B, src);
+ struct in_addr tunsrc = inet_makeaddr(INADDR_A, src);
+ struct in_addr tundst = inet_makeaddr(INADDR_A, dst);
+ int route_sock = -1, ret = -1;
+ uint32_t route_seq;
+
+ if (switch_ns(nsfd))
+ return -1;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) {
+ printk("Failed to open netlink route socket in child");
+ return -1;
+ }
+
+ if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) {
+ printk("Failed to set ipv4 addr");
+ goto err;
+ }
+
+ if (link_set_up(route_sock, route_seq++, veth)) {
+ printk("Failed to bring up %s", veth);
+ goto err;
+ }
+
+ if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) {
+ printk("Failed to add tunnel route on %s", veth);
+ goto err;
+ }
+ ret = 0;
+
+err:
+ close(route_sock);
+ return ret;
+}
+
+#define ALGO_LEN 64
+enum desc_type {
+ CREATE_TUNNEL = 0,
+ ALLOCATE_SPI,
+ MONITOR_ACQUIRE,
+ EXPIRE_STATE,
+ EXPIRE_POLICY,
+};
+const char *desc_name[] = {
+ "create tunnel",
+ "alloc spi",
+ "monitor acquire",
+ "expire state",
+ "expire policy"
+};
+struct xfrm_desc {
+ enum desc_type type;
+ uint8_t proto;
+ char a_algo[ALGO_LEN];
+ char e_algo[ALGO_LEN];
+ char c_algo[ALGO_LEN];
+ char ae_algo[ALGO_LEN];
+ unsigned int icv_len;
+ /* unsigned key_len; */
+};
+
+enum msg_type {
+ MSG_ACK = 0,
+ MSG_EXIT,
+ MSG_PING,
+ MSG_XFRM_PREPARE,
+ MSG_XFRM_ADD,
+ MSG_XFRM_DEL,
+ MSG_XFRM_CLEANUP,
+};
+
+struct test_desc {
+ enum msg_type type;
+ union {
+ struct {
+ in_addr_t reply_ip;
+ unsigned int port;
+ } ping;
+ struct xfrm_desc xfrm_desc;
+ } body;
+};
+
+struct test_result {
+ struct xfrm_desc desc;
+ unsigned int res;
+};
+
+static void write_test_result(unsigned int res, struct xfrm_desc *d)
+{
+ struct test_result tr = {};
+ ssize_t ret;
+
+ tr.desc = *d;
+ tr.res = res;
+
+ ret = write(results_fd[1], &tr, sizeof(tr));
+ if (ret != sizeof(tr))
+ pr_err("Failed to write the result in pipe %zd", ret);
+}
+
+static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+ ssize_t bytes = write(fd, msg, sizeof(*msg));
+
+ /* Make sure that write/read is atomic to a pipe */
+ BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF);
+
+ if (bytes < 0) {
+ pr_err("write()");
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+ if (bytes != sizeof(*msg)) {
+ pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg));
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+}
+
+static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+ ssize_t bytes = read(fd, msg, sizeof(*msg));
+
+ if (bytes < 0) {
+ pr_err("read()");
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+ if (bytes != sizeof(*msg)) {
+ pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg));
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+}
+
+static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout,
+ unsigned int *server_port, int sock[2])
+{
+ struct sockaddr_in server;
+ struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout };
+ socklen_t s_len = sizeof(server);
+
+ sock[0] = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock[0] < 0) {
+ pr_err("socket()");
+ return -1;
+ }
+
+ server.sin_family = AF_INET;
+ server.sin_port = 0;
+ memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr));
+
+ if (bind(sock[0], (struct sockaddr *)&server, s_len)) {
+ pr_err("bind()");
+ goto err_close_server;
+ }
+
+ if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) {
+ pr_err("getsockname()");
+ goto err_close_server;
+ }
+
+ *server_port = ntohs(server.sin_port);
+
+ if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) {
+ pr_err("setsockopt()");
+ goto err_close_server;
+ }
+
+ sock[1] = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock[1] < 0) {
+ pr_err("socket()");
+ goto err_close_server;
+ }
+
+ return 0;
+
+err_close_server:
+ close(sock[0]);
+ return -1;
+}
+
+static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len)
+{
+ struct sockaddr_in server;
+ const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+ char *sock_buf[buf_len];
+ ssize_t r_bytes, s_bytes;
+
+ server.sin_family = AF_INET;
+ server.sin_port = htons(port);
+ server.sin_addr.s_addr = dest_ip;
+
+ s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+ if (s_bytes < 0) {
+ pr_err("sendto()");
+ return -1;
+ } else if (s_bytes != buf_len) {
+ printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+ return -1;
+ }
+
+ r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+ if (r_bytes < 0) {
+ if (errno != EAGAIN)
+ pr_err("recv()");
+ return -1;
+ } else if (r_bytes == 0) { /* EOF */
+ printk("EOF on reply to ping");
+ return -1;
+ } else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+ printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len)
+{
+ struct sockaddr_in server;
+ const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+ char *sock_buf[buf_len];
+ ssize_t r_bytes, s_bytes;
+
+ server.sin_family = AF_INET;
+ server.sin_port = htons(port);
+ server.sin_addr.s_addr = dest_ip;
+
+ r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+ if (r_bytes < 0) {
+ if (errno != EAGAIN)
+ pr_err("recv()");
+ return -1;
+ }
+ if (r_bytes == 0) { /* EOF */
+ printk("EOF on reply to ping");
+ return -1;
+ }
+ if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+ printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+ return -1;
+ }
+
+ s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+ if (s_bytes < 0) {
+ pr_err("sendto()");
+ return -1;
+ } else if (s_bytes != buf_len) {
+ printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+ return -1;
+ }
+
+ return 0;
+}
+
+typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len);
+static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from,
+ bool init_side, int d_port, in_addr_t to, ping_f func)
+{
+ struct test_desc msg;
+ unsigned int s_port, i, ping_succeeded = 0;
+ int ping_sock[2];
+ char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {};
+
+ if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) {
+ printk("Failed to init ping");
+ return -1;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_PING;
+ msg.body.ping.port = s_port;
+ memcpy(&msg.body.ping.reply_ip, &from, sizeof(from));
+
+ write_msg(cmd_fd, &msg, 0);
+ if (init_side) {
+ /* The other end sends ip to ping */
+ read_msg(cmd_fd, &msg, 0);
+ if (msg.type != MSG_PING)
+ return -1;
+ to = msg.body.ping.reply_ip;
+ d_port = msg.body.ping.port;
+ }
+
+ for (i = 0; i < ping_count ; i++) {
+ struct timespec sleep_time = {
+ .tv_sec = 0,
+ .tv_nsec = ping_delay_nsec,
+ };
+
+ ping_succeeded += !func(ping_sock, to, d_port, buf, page_size);
+ nanosleep(&sleep_time, 0);
+ }
+
+ close(ping_sock[0]);
+ close(ping_sock[1]);
+
+ strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1);
+ strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1);
+
+ if (ping_succeeded < ping_success) {
+ printk("ping (%s) %s->%s failed %u/%u times",
+ init_side ? "send" : "reply", from_str, to_str,
+ ping_count - ping_succeeded, ping_count);
+ return -1;
+ }
+
+#ifdef DEBUG
+ printk("ping (%s) %s->%s succeeded %u/%u times",
+ init_side ? "send" : "reply", from_str, to_str,
+ ping_succeeded, ping_count);
+#endif
+
+ return 0;
+}
+
+static int xfrm_fill_key(char *name, char *buf,
+ size_t buf_len, unsigned int *key_len)
+{
+ /* TODO: use set/map instead */
+ if (strncmp(name, "digest_null", ALGO_LEN) == 0)
+ *key_len = 0;
+ else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0)
+ *key_len = 0;
+ else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0)
+ *key_len = 64;
+ else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0)
+ *key_len = 128;
+ else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0)
+ *key_len = 192;
+ else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0)
+ *key_len = 256;
+ else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0)
+ *key_len = 384;
+ else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0)
+ *key_len = 448;
+ else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0)
+ *key_len = 512;
+ else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 160;
+ else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0)
+ *key_len = 152;
+ else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 224;
+ else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 224;
+ else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0)
+ *key_len = 216;
+ else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 288;
+ else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0)
+ *key_len = 280;
+ else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0)
+ *key_len = 0;
+
+ if (*key_len > buf_len) {
+ printk("Can't pack a key - too big for buffer");
+ return -1;
+ }
+
+ randomize_buffer(buf, *key_len);
+
+ return 0;
+}
+
+static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
+ struct xfrm_desc *desc)
+{
+ struct {
+ union {
+ struct xfrm_algo alg;
+ struct xfrm_algo_aead aead;
+ struct xfrm_algo_auth auth;
+ } u;
+ char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ } alg = {};
+ size_t alen, elen, clen, aelen;
+ unsigned short type;
+
+ alen = strlen(desc->a_algo);
+ elen = strlen(desc->e_algo);
+ clen = strlen(desc->c_algo);
+ aelen = strlen(desc->ae_algo);
+
+ /* Verify desc */
+ switch (desc->proto) {
+ case IPPROTO_AH:
+ if (!alen || elen || clen || aelen) {
+ printk("BUG: buggy ah desc");
+ return -1;
+ }
+ strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_AUTH;
+ break;
+ case IPPROTO_COMP:
+ if (!clen || elen || alen || aelen) {
+ printk("BUG: buggy comp desc");
+ return -1;
+ }
+ strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_COMP;
+ break;
+ case IPPROTO_ESP:
+ if (!((alen && elen) ^ aelen) || clen) {
+ printk("BUG: buggy esp desc");
+ return -1;
+ }
+ if (aelen) {
+ alg.u.aead.alg_icv_len = desc->icv_len;
+ strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key,
+ sizeof(alg.buf), &alg.u.aead.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_AEAD;
+ } else {
+
+ strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1);
+ type = XFRMA_ALG_CRYPT;
+ if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+ return -1;
+
+ strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN);
+ type = XFRMA_ALG_AUTH;
+ if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ }
+ break;
+ default:
+ printk("BUG: unknown proto in desc");
+ return -1;
+ }
+
+ if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+ return -1;
+
+ return 0;
+}
+
+static inline uint32_t gen_spi(struct in_addr src)
+{
+ return htonl(inet_lnaof(src));
+}
+
+static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_usersa_info info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = XFRM_MSG_NEWSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill selector. */
+ memcpy(&req.info.sel.daddr, &dst, sizeof(dst));
+ memcpy(&req.info.sel.saddr, &src, sizeof(src));
+ req.info.sel.family = AF_INET;
+ req.info.sel.prefixlen_d = PREFIX_LEN;
+ req.info.sel.prefixlen_s = PREFIX_LEN;
+
+ /* Fill id */
+ memcpy(&req.info.id.daddr, &dst, sizeof(dst));
+ /* Note: zero-spi cannot be deleted */
+ req.info.id.spi = spi;
+ req.info.id.proto = desc->proto;
+
+ memcpy(&req.info.saddr, &src, sizeof(src));
+
+ /* Fill lifteme_cfg */
+ req.info.lft.soft_byte_limit = XFRM_INF;
+ req.info.lft.hard_byte_limit = XFRM_INF;
+ req.info.lft.soft_packet_limit = XFRM_INF;
+ req.info.lft.hard_packet_limit = XFRM_INF;
+
+ req.info.family = AF_INET;
+ req.info.mode = XFRM_MODE_TUNNEL;
+
+ if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ if (memcmp(&info->sel.daddr, &dst, sizeof(dst)))
+ return false;
+
+ if (memcmp(&info->sel.saddr, &src, sizeof(src)))
+ return false;
+
+ if (info->sel.family != AF_INET ||
+ info->sel.prefixlen_d != PREFIX_LEN ||
+ info->sel.prefixlen_s != PREFIX_LEN)
+ return false;
+
+ if (info->id.spi != spi || info->id.proto != desc->proto)
+ return false;
+
+ if (memcmp(&info->id.daddr, &dst, sizeof(dst)))
+ return false;
+
+ if (memcmp(&info->saddr, &src, sizeof(src)))
+ return false;
+
+ if (info->lft.soft_byte_limit != XFRM_INF ||
+ info->lft.hard_byte_limit != XFRM_INF ||
+ info->lft.soft_packet_limit != XFRM_INF ||
+ info->lft.hard_packet_limit != XFRM_INF)
+ return false;
+
+ if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL)
+ return false;
+
+ /* XXX: check xfrm algo, see xfrm_state_pack_algo(). */
+
+ return true;
+}
+
+static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_usersa_info info;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } answer;
+ struct xfrm_address_filter filter = {};
+ bool found = false;
+
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(0);
+ req.nh.nlmsg_type = XFRM_MSG_GETSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.nh.nlmsg_seq = seq;
+
+ /*
+ * Add dump filter by source address as there may be other tunnels
+ * in this netns (if tests run in parallel).
+ */
+ filter.family = AF_INET;
+ filter.splen = 0x1f; /* 0xffffffff mask see addr_match() */
+ memcpy(&filter.saddr, &src, sizeof(src));
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER,
+ &filter, sizeof(filter)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ while (1) {
+ if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ }
+ if (answer.nh.nlmsg_type == NLMSG_ERROR) {
+ printk("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ return -1;
+ } else if (answer.nh.nlmsg_type == NLMSG_DONE) {
+ if (found)
+ return 0;
+ printk("didn't find allocated xfrm state in dump");
+ return -1;
+ } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+ if (xfrm_usersa_found(&answer.info, spi, src, dst, desc))
+ found = true;
+ }
+ }
+}
+
+static int xfrm_set(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst,
+ struct xfrm_desc *desc)
+{
+ int err;
+
+ err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+ if (err) {
+ printk("Failed to add xfrm state");
+ return -1;
+ }
+
+ err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+ if (err) {
+ printk("Failed to add xfrm state");
+ return -1;
+ }
+
+ /* Check dumps for XFRM_MSG_GETSA */
+ err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+ err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+ if (err) {
+ printk("Failed to check xfrm state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst, uint8_t dir,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userpolicy_info info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrm_user_tmpl tmpl;
+
+ memset(&req, 0, sizeof(req));
+ memset(&tmpl, 0, sizeof(tmpl));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = XFRM_MSG_NEWPOLICY;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill selector. */
+ memcpy(&req.info.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc));
+ req.info.sel.family = AF_INET;
+ req.info.sel.prefixlen_d = PREFIX_LEN;
+ req.info.sel.prefixlen_s = PREFIX_LEN;
+
+ /* Fill lifteme_cfg */
+ req.info.lft.soft_byte_limit = XFRM_INF;
+ req.info.lft.hard_byte_limit = XFRM_INF;
+ req.info.lft.soft_packet_limit = XFRM_INF;
+ req.info.lft.hard_packet_limit = XFRM_INF;
+
+ req.info.dir = dir;
+
+ /* Fill tmpl */
+ memcpy(&tmpl.id.daddr, &dst, sizeof(dst));
+ /* Note: zero-spi cannot be deleted */
+ tmpl.id.spi = spi;
+ tmpl.id.proto = proto;
+ tmpl.family = AF_INET;
+ memcpy(&tmpl.saddr, &src, sizeof(src));
+ tmpl.mode = XFRM_MODE_TUNNEL;
+ tmpl.aalgos = (~(uint32_t)0);
+ tmpl.ealgos = (~(uint32_t)0);
+ tmpl.calgos = (~(uint32_t)0);
+
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_prepare(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst, proto)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src,
+ XFRM_POLICY_IN, tunsrc, tundst, proto)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_policy_del(int xfrm_sock, uint32_t seq,
+ struct in_addr src, struct in_addr dst, uint8_t dir,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userpolicy_id id;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id));
+ req.nh.nlmsg_type = XFRM_MSG_DELPOLICY;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill id */
+ memcpy(&req.id.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc));
+ req.id.sel.family = AF_INET;
+ req.id.sel.prefixlen_d = PREFIX_LEN;
+ req.id.sel.prefixlen_s = PREFIX_LEN;
+ req.id.dir = dir;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_cleanup(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src,
+ XFRM_POLICY_IN, tunsrc, tundst)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_usersa_id id;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ xfrm_address_t saddr = {};
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id));
+ req.nh.nlmsg_type = XFRM_MSG_DELSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ memcpy(&req.id.daddr, &dst, sizeof(dst));
+ req.id.family = AF_INET;
+ req.id.proto = proto;
+ /* Note: zero-spi cannot be deleted */
+ req.id.spi = spi;
+
+ memcpy(&saddr, &src, sizeof(src));
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_delete(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) {
+ printk("Failed to remove xfrm state");
+ return -1;
+ }
+
+ if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) {
+ printk("Failed to remove xfrm state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq,
+ uint32_t spi, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userspi_info spi;
+ } req;
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_usersa_info info;
+ int error;
+ };
+ } answer;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.spi));
+ req.nh.nlmsg_type = XFRM_MSG_ALLOCSPI;
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ req.spi.info.family = AF_INET;
+ req.spi.min = spi;
+ req.spi.max = spi;
+ req.spi.info.id.proto = proto;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return KSFT_FAIL;
+ }
+
+ if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return KSFT_FAIL;
+ } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+ uint32_t new_spi = htonl(answer.info.id.spi);
+
+ if (new_spi != spi) {
+ printk("allocated spi is different from requested: %#x != %#x",
+ new_spi, spi);
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+ } else if (answer.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type);
+ return KSFT_FAIL;
+ }
+
+ printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error));
+ return (answer.error) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups)
+{
+ struct sockaddr_nl snl = {};
+ socklen_t addr_len;
+ int ret = -1;
+
+ snl.nl_family = AF_NETLINK;
+ snl.nl_groups = groups;
+
+ if (netlink_sock(sock, seq, proto)) {
+ printk("Failed to open xfrm netlink socket");
+ return -1;
+ }
+
+ if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) {
+ pr_err("bind()");
+ goto out_close;
+ }
+
+ addr_len = sizeof(snl);
+ if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) {
+ pr_err("getsockname()");
+ goto out_close;
+ }
+ if (addr_len != sizeof(snl)) {
+ printk("Wrong address length %d", addr_len);
+ goto out_close;
+ }
+ if (snl.nl_family != AF_NETLINK) {
+ printk("Wrong address family %d", snl.nl_family);
+ goto out_close;
+ }
+ return 0;
+
+out_close:
+ close(*sock);
+ return ret;
+}
+
+static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_acquire acq;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrm_user_tmpl xfrm_tmpl = {};
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.acq));
+ req.nh.nlmsg_type = XFRM_MSG_ACQUIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ req.acq.policy.sel.family = AF_INET;
+ req.acq.aalgos = 0xfeed;
+ req.acq.ealgos = 0xbaad;
+ req.acq.calgos = 0xbabe;
+
+ xfrm_tmpl.family = AF_INET;
+ xfrm_tmpl.id.proto = IPPROTO_ESP;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl)))
+ goto out_close;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad
+ || req.acq.calgos != 0xbabe) {
+ printk("xfrm_user_acquire has changed %x %x %x",
+ req.acq.aalgos, req.acq.ealgos, req.acq.calgos);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_expire_state(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_expire expire;
+ int error;
+ };
+ } req;
+ struct in_addr src, dst;
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+
+ if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) {
+ printk("Failed to add xfrm state");
+ return KSFT_FAIL;
+ }
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire));
+ req.nh.nlmsg_type = XFRM_MSG_EXPIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst));
+ req.expire.state.id.spi = gen_spi(src);
+ req.expire.state.id.proto = desc->proto;
+ req.expire.state.family = AF_INET;
+ req.expire.hard = 0xff;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.expire.hard != 0x1) {
+ printk("expire.hard is not set: %x", req.expire.hard);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_polexpire expire;
+ int error;
+ };
+ } req;
+ struct in_addr src, dst, tunsrc, tundst;
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) {
+ printk("Failed to add xfrm policy");
+ return KSFT_FAIL;
+ }
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire));
+ req.nh.nlmsg_type = XFRM_MSG_POLEXPIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ /* Fill selector. */
+ memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc));
+ req.expire.pol.sel.family = AF_INET;
+ req.expire.pol.sel.prefixlen_d = PREFIX_LEN;
+ req.expire.pol.sel.prefixlen_s = PREFIX_LEN;
+ req.expire.pol.dir = XFRM_POLICY_OUT;
+ req.expire.hard = 0xff;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.expire.hard != 0x1) {
+ printk("expire.hard is not set: %x", req.expire.hard);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int child_serv(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
+{
+ struct in_addr src, dst, tunsrc, tundst;
+ struct test_desc msg;
+ int ret = KSFT_FAIL;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+ /* UDP pinging without xfrm */
+ if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) {
+ printk("ping failed before setting xfrm");
+ return KSFT_FAIL;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_PREPARE;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+
+ if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+ printk("failed to prepare xfrm");
+ goto cleanup;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_ADD;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+ if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+ printk("failed to set xfrm");
+ goto delete;
+ }
+
+ /* UDP pinging with xfrm tunnel */
+ if (do_ping(cmd_fd, buf, page_size, tunsrc,
+ true, 0, 0, udp_ping_send)) {
+ printk("ping failed for xfrm");
+ goto delete;
+ }
+
+ ret = KSFT_PASS;
+delete:
+ /* xfrm delete */
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_DEL;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+
+ if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+ printk("failed ping to remove xfrm");
+ ret = KSFT_FAIL;
+ }
+
+cleanup:
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_CLEANUP;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+ if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+ printk("failed ping to cleanup xfrm");
+ ret = KSFT_FAIL;
+ }
+ return ret;
+}
+
+static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
+{
+ struct xfrm_desc desc;
+ struct test_desc msg;
+ int xfrm_sock = -1;
+ uint32_t seq;
+
+ if (switch_ns(nsfd_childa))
+ exit(KSFT_FAIL);
+
+ if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+ printk("Failed to open xfrm netlink socket");
+ exit(KSFT_FAIL);
+ }
+
+ /* Check that seq sock is ready, just for sure. */
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_ACK;
+ write_msg(cmd_fd, &msg, 1);
+ read_msg(cmd_fd, &msg, 1);
+ if (msg.type != MSG_ACK) {
+ printk("Ack failed");
+ exit(KSFT_FAIL);
+ }
+
+ for (;;) {
+ ssize_t received = read(test_desc_fd, &desc, sizeof(desc));
+ int ret;
+
+ if (received == 0) /* EOF */
+ break;
+
+ if (received != sizeof(desc)) {
+ pr_err("read() returned %zd", received);
+ exit(KSFT_FAIL);
+ }
+
+ switch (desc.type) {
+ case CREATE_TUNNEL:
+ ret = child_serv(xfrm_sock, &seq, nr,
+ cmd_fd, buf, &desc);
+ break;
+ case ALLOCATE_SPI:
+ ret = xfrm_state_allocspi(xfrm_sock, &seq,
+ -1, desc.proto);
+ break;
+ case MONITOR_ACQUIRE:
+ ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr);
+ break;
+ case EXPIRE_STATE:
+ ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc);
+ break;
+ case EXPIRE_POLICY:
+ ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
+ break;
+ default:
+ printk("Unknown desc type %d", desc.type);
+ exit(KSFT_FAIL);
+ }
+ write_test_result(ret, &desc);
+ }
+
+ close(xfrm_sock);
+
+ msg.type = MSG_EXIT;
+ write_msg(cmd_fd, &msg, 1);
+ exit(KSFT_PASS);
+}
+
+static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
+ struct test_desc *msg, int xfrm_sock, uint32_t *seq)
+{
+ struct in_addr src, dst, tunsrc, tundst;
+ bool tun_reply;
+ struct xfrm_desc *desc = &msg->body.xfrm_desc;
+
+ src = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ dst = inet_makeaddr(INADDR_B, child_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, child_ip(nr));
+
+ switch (msg->type) {
+ case MSG_EXIT:
+ exit(KSFT_PASS);
+ case MSG_ACK:
+ write_msg(cmd_fd, msg, 1);
+ break;
+ case MSG_PING:
+ tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t));
+ /* UDP pinging without xfrm */
+ if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src,
+ false, msg->body.ping.port,
+ msg->body.ping.reply_ip, udp_ping_reply)) {
+ printk("ping failed before setting xfrm");
+ }
+ break;
+ case MSG_XFRM_PREPARE:
+ if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst,
+ desc->proto)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to prepare xfrm");
+ }
+ break;
+ case MSG_XFRM_ADD:
+ if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to set xfrm");
+ }
+ break;
+ case MSG_XFRM_DEL:
+ if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst,
+ desc->proto)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to remove xfrm");
+ }
+ break;
+ case MSG_XFRM_CLEANUP:
+ if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+ printk("failed to cleanup xfrm");
+ }
+ break;
+ default:
+ printk("got unknown msg type %d", msg->type);
+ };
+}
+
+static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
+{
+ struct test_desc msg;
+ int xfrm_sock = -1;
+ uint32_t seq;
+
+ if (switch_ns(nsfd_childb))
+ exit(KSFT_FAIL);
+
+ if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+ printk("Failed to open xfrm netlink socket");
+ exit(KSFT_FAIL);
+ }
+
+ do {
+ read_msg(cmd_fd, &msg, 1);
+ grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq);
+ } while (1);
+
+ close(xfrm_sock);
+ exit(KSFT_FAIL);
+}
+
+static int start_child(unsigned int nr, char *veth, int test_desc_fd[2])
+{
+ int cmd_sock[2];
+ void *data_map;
+ pid_t child;
+
+ if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr)))
+ return -1;
+
+ if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr)))
+ return -1;
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ } else if (child) {
+ /* in parent - selftest */
+ return switch_ns(nsfd_parent);
+ }
+
+ if (close(test_desc_fd[1])) {
+ pr_err("close()");
+ return -1;
+ }
+
+ /* child */
+ data_map = mmap(0, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (data_map == MAP_FAILED) {
+ pr_err("mmap()");
+ return -1;
+ }
+
+ randomize_buffer(data_map, page_size);
+
+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) {
+ pr_err("socketpair()");
+ return -1;
+ }
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ } else if (child) {
+ if (close(cmd_sock[0])) {
+ pr_err("close()");
+ return -1;
+ }
+ return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map);
+ }
+ if (close(cmd_sock[1])) {
+ pr_err("close()");
+ return -1;
+ }
+ return grand_child_f(nr, cmd_sock[0], data_map);
+}
+
+static void exit_usage(char **argv)
+{
+ printk("Usage: %s [nr_process]", argv[0]);
+ exit(KSFT_FAIL);
+}
+
+static int __write_desc(int test_desc_fd, struct xfrm_desc *desc)
+{
+ ssize_t ret;
+
+ ret = write(test_desc_fd, desc, sizeof(*desc));
+
+ if (ret == sizeof(*desc))
+ return 0;
+
+ pr_err("Writing test's desc failed %ld", ret);
+
+ return -1;
+}
+
+static int write_desc(int proto, int test_desc_fd,
+ char *a, char *e, char *c, char *ae)
+{
+ struct xfrm_desc desc = {};
+
+ desc.type = CREATE_TUNNEL;
+ desc.proto = proto;
+
+ if (a)
+ strncpy(desc.a_algo, a, ALGO_LEN - 1);
+ if (e)
+ strncpy(desc.e_algo, e, ALGO_LEN - 1);
+ if (c)
+ strncpy(desc.c_algo, c, ALGO_LEN - 1);
+ if (ae)
+ strncpy(desc.ae_algo, ae, ALGO_LEN - 1);
+
+ return __write_desc(test_desc_fd, &desc);
+}
+
+int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP };
+char *ah_list[] = {
+ "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)",
+ "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)",
+ "xcbc(aes)", "cmac(aes)"
+};
+char *comp_list[] = {
+ "deflate",
+#if 0
+ /* No compression backend realization */
+ "lzs", "lzjh"
+#endif
+};
+char *e_list[] = {
+ "ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)",
+ "cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)",
+ "cbc(twofish)", "rfc3686(ctr(aes))"
+};
+char *ae_list[] = {
+#if 0
+ /* not implemented */
+ "rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))",
+ "rfc7539esp(chacha20,poly1305)"
+#endif
+};
+
+const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \
+ + (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \
+ + ARRAY_SIZE(ae_list);
+
+static int write_proto_plan(int fd, int proto)
+{
+ unsigned int i;
+
+ switch (proto) {
+ case IPPROTO_AH:
+ for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+ if (write_desc(proto, fd, ah_list[i], 0, 0, 0))
+ return -1;
+ }
+ break;
+ case IPPROTO_COMP:
+ for (i = 0; i < ARRAY_SIZE(comp_list); i++) {
+ if (write_desc(proto, fd, 0, 0, comp_list[i], 0))
+ return -1;
+ }
+ break;
+ case IPPROTO_ESP:
+ for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+ int j;
+
+ for (j = 0; j < ARRAY_SIZE(e_list); j++) {
+ if (write_desc(proto, fd, ah_list[i],
+ e_list[j], 0, 0))
+ return -1;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(ae_list); i++) {
+ if (write_desc(proto, fd, 0, 0, 0, ae_list[i]))
+ return -1;
+ }
+ break;
+ default:
+ printk("BUG: Specified unknown proto %d", proto);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Some structures in xfrm uapi header differ in size between
+ * 64-bit and 32-bit ABI:
+ *
+ * 32-bit UABI | 64-bit UABI
+ * -------------------------------------|-------------------------------------
+ * sizeof(xfrm_usersa_info) = 220 | sizeof(xfrm_usersa_info) = 224
+ * sizeof(xfrm_userpolicy_info) = 164 | sizeof(xfrm_userpolicy_info) = 168
+ * sizeof(xfrm_userspi_info) = 228 | sizeof(xfrm_userspi_info) = 232
+ * sizeof(xfrm_user_acquire) = 276 | sizeof(xfrm_user_acquire) = 280
+ * sizeof(xfrm_user_expire) = 224 | sizeof(xfrm_user_expire) = 232
+ * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176
+ *
+ * Check the affected by the UABI difference structures.
+ */
+const unsigned int compat_plan = 4;
+static int write_compat_struct_tests(int test_desc_fd)
+{
+ struct xfrm_desc desc = {};
+
+ desc.type = ALLOCATE_SPI;
+ desc.proto = IPPROTO_AH;
+ strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1);
+
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = MONITOR_ACQUIRE;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = EXPIRE_STATE;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = EXPIRE_POLICY;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ return 0;
+}
+
+static int write_test_plan(int test_desc_fd)
+{
+ unsigned int i;
+ pid_t child;
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ }
+ if (child) {
+ if (close(test_desc_fd))
+ printk("close(): %m");
+ return 0;
+ }
+
+ if (write_compat_struct_tests(test_desc_fd))
+ exit(KSFT_FAIL);
+
+ for (i = 0; i < ARRAY_SIZE(proto_list); i++) {
+ if (write_proto_plan(test_desc_fd, proto_list[i]))
+ exit(KSFT_FAIL);
+ }
+
+ exit(KSFT_PASS);
+}
+
+static int children_cleanup(void)
+{
+ unsigned ret = KSFT_PASS;
+
+ while (1) {
+ int status;
+ pid_t p = wait(&status);
+
+ if ((p < 0) && errno == ECHILD)
+ break;
+
+ if (p < 0) {
+ pr_err("wait()");
+ return KSFT_FAIL;
+ }
+
+ if (!WIFEXITED(status)) {
+ ret = KSFT_FAIL;
+ continue;
+ }
+
+ if (WEXITSTATUS(status) == KSFT_FAIL)
+ ret = KSFT_FAIL;
+ }
+
+ return ret;
+}
+
+typedef void (*print_res)(const char *, ...);
+
+static int check_results(void)
+{
+ struct test_result tr = {};
+ struct xfrm_desc *d = &tr.desc;
+ int ret = KSFT_PASS;
+
+ while (1) {
+ ssize_t received = read(results_fd[0], &tr, sizeof(tr));
+ print_res result;
+
+ if (received == 0) /* EOF */
+ break;
+
+ if (received != sizeof(tr)) {
+ pr_err("read() returned %zd", received);
+ return KSFT_FAIL;
+ }
+
+ switch (tr.res) {
+ case KSFT_PASS:
+ result = ksft_test_result_pass;
+ break;
+ case KSFT_FAIL:
+ default:
+ result = ksft_test_result_fail;
+ ret = KSFT_FAIL;
+ }
+
+ result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n",
+ desc_name[d->type], (unsigned int)d->proto, d->a_algo,
+ d->e_algo, d->c_algo, d->ae_algo, d->icv_len);
+ }
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int nr_process = 1;
+ int route_sock = -1, ret = KSFT_SKIP;
+ int test_desc_fd[2];
+ uint32_t route_seq;
+ unsigned int i;
+
+ if (argc > 2)
+ exit_usage(argv);
+
+ if (argc > 1) {
+ char *endptr;
+
+ errno = 0;
+ nr_process = strtol(argv[1], &endptr, 10);
+ if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN))
+ || (errno != 0 && nr_process == 0)
+ || (endptr == argv[1]) || (*endptr != '\0')) {
+ printk("Failed to parse [nr_process]");
+ exit_usage(argv);
+ }
+
+ if (nr_process > MAX_PROCESSES || !nr_process) {
+ printk("nr_process should be between [1; %u]",
+ MAX_PROCESSES);
+ exit_usage(argv);
+ }
+ }
+
+ srand(time(NULL));
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 1)
+ ksft_exit_skip("sysconf(): %m\n");
+
+ if (pipe2(test_desc_fd, O_DIRECT) < 0)
+ ksft_exit_skip("pipe(): %m\n");
+
+ if (pipe2(results_fd, O_DIRECT) < 0)
+ ksft_exit_skip("pipe(): %m\n");
+
+ if (init_namespaces())
+ ksft_exit_skip("Failed to create namespaces\n");
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ ksft_exit_skip("Failed to open netlink route socket\n");
+
+ for (i = 0; i < nr_process; i++) {
+ char veth[VETH_LEN];
+
+ snprintf(veth, VETH_LEN, VETH_FMT, i);
+
+ if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) {
+ close(route_sock);
+ ksft_exit_fail_msg("Failed to create veth device");
+ }
+
+ if (start_child(i, veth, test_desc_fd)) {
+ close(route_sock);
+ ksft_exit_fail_msg("Child %u failed to start", i);
+ }
+ }
+
+ if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1]))
+ ksft_exit_fail_msg("close(): %m");
+
+ ksft_set_plan(proto_plan + compat_plan);
+
+ if (write_test_plan(test_desc_fd[1]))
+ ksft_exit_fail_msg("Failed to write test plan to pipe");
+
+ ret = check_results();
+
+ if (children_cleanup() == KSFT_FAIL)
+ exit(KSFT_FAIL);
+
+ exit(ret);
+}
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index aa254aefc2c3..00bb158b4a5d 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -5,7 +5,8 @@ KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+ simult_flows.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 8df5cb8f71ff..741a1c4f4ae8 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,4 +1,5 @@
CONFIG_MPTCP=y
+CONFIG_IPV6=y
CONFIG_MPTCP_IPV6=y
CONFIG_INET_DIAG=m
CONFIG_INET_MPTCP_DIAG=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 090620c3e10c..77bb62feb872 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -54,6 +54,7 @@ static int pf = AF_INET;
static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
+static bool cfg_remove;
static int cfg_wait;
static void die_usage(void)
@@ -65,8 +66,8 @@ static void die_usage(void)
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
fprintf(stderr, "\t-p num -- use port num\n");
- fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
- fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+ fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
@@ -271,6 +272,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (cfg_join && first && do_w > 100)
do_w = 100;
+ if (cfg_remove && do_w > 50)
+ do_w = 50;
+
bw = write(fd, buf, do_w);
if (bw < 0)
perror("write");
@@ -281,6 +285,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
first = false;
}
+ if (cfg_remove)
+ usleep(200000);
+
return bw;
}
@@ -428,7 +435,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
/* leave some time for late join/announce */
- if (cfg_join)
+ if (cfg_join || cfg_remove)
usleep(cfg_wait);
close(peerfd);
@@ -686,7 +693,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
- if (!cfg_join && (r & 1))
+ if (!(cfg_join || cfg_remove) && (r & 1))
close(fd);
}
@@ -822,13 +829,18 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) {
+ while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
cfg_mode = CFG_MODE_POLL;
cfg_wait = 400000;
break;
+ case 'r':
+ cfg_remove = true;
+ cfg_mode = CFG_MODE_POLL;
+ cfg_wait = 400000;
+ break;
case 'l':
listen_mode = true;
break;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 57d75b7f6220..2cfd87d94db8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -14,9 +14,8 @@ capture=false
timeout=30
ipv6=true
ethtool_random_on=true
-tc_delay="$((RANDOM%400))"
+tc_delay="$((RANDOM%50))"
tc_loss=$((RANDOM%101))
-tc_reorder=""
testmode=""
sndbuf=0
rcvbuf=0
@@ -444,9 +443,9 @@ do_transfer()
duration=$(printf "(duration %05sms)" $duration)
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
- echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
- echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
cat "$capout"
@@ -628,30 +627,32 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns4" $sender dead:beef:3::1
done
-[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss
+[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
echo -n "INFO: Using loss of $tc_loss "
test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+reorder_delay=$(($tc_delay / 4))
+
if [ -z "${tc_reorder}" ]; then
reorder1=$((RANDOM%10))
reorder1=$((100 - reorder1))
reorder2=$((RANDOM%100))
- if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
+ if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
tc_reorder="reorder ${reorder1}% ${reorder2}%"
- echo -n "$tc_reorder "
+ echo -n "$tc_reorder with delay ${reorder_delay}ms "
fi
elif [ "$tc_reorder" = "0" ];then
tc_reorder=""
-elif [ "$tc_delay" -gt 0 ];then
+elif [ "$reorder_delay" -gt 0 ];then
# reordering requires some delay
tc_reorder="reorder $tc_reorder"
- echo -n "$tc_reorder "
+ echo -n "$tc_reorder with delay ${reorder_delay}ms "
fi
echo "on ns3eth4"
-tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder
+tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests_lo "$ns1" "$sender" 10.0.1.1 1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f39c1129ce5f..08f53d86dedc 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,6 +8,7 @@ cin=""
cout=""
ksft_skip=4
timeout=30
+mptcp_connect=""
capture=0
TEST_COUNT=0
@@ -132,6 +133,8 @@ do_transfer()
cl_proto="$3"
srv_proto="$4"
connect_addr="$5"
+ rm_nr_ns1="$6"
+ rm_nr_ns2="$7"
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
@@ -156,14 +159,44 @@ do_transfer()
sleep 1
fi
- ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+ if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then
+ mptcp_connect="./mptcp_connect -j"
+ else
+ mptcp_connect="./mptcp_connect -r"
+ fi
+
+ ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
spid=$!
sleep 1
- ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
cpid=$!
+ if [ $rm_nr_ns1 -gt 0 ]; then
+ counter=1
+ sleep 1
+
+ while [ $counter -le $rm_nr_ns1 ]
+ do
+ ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+ sleep 1
+ let counter+=1
+ done
+ fi
+
+ if [ $rm_nr_ns2 -gt 0 ]; then
+ counter=1
+ sleep 1
+
+ while [ $counter -le $rm_nr_ns2 ]
+ do
+ ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+ sleep 1
+ let counter+=1
+ done
+ fi
+
wait $cpid
retc=$?
wait $spid
@@ -176,9 +209,9 @@ do_transfer()
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo " client exit code $retc, server $rets" 1>&2
- echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
- echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
cat "$capout"
@@ -219,7 +252,24 @@ run_tests()
connect_addr="$3"
lret=0
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return
+ fi
+}
+
+run_remove_tests()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ connect_addr="$3"
+ rm_nr_ns1="$4"
+ rm_nr_ns2="$5"
+ lret=0
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2}
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
@@ -276,6 +326,80 @@ chk_join_nr()
fi
}
+chk_add_nr()
+{
+ local add_nr=$1
+ local echo_nr=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "add"
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$add_nr" ]; then
+ echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - echo "
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$echo_nr" ]; then
+ echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
+chk_rm_nr()
+{
+ local rm_addr_nr=$1
+ local rm_subflow_nr=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "rm "
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$rm_addr_nr" ]; then
+ echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - sf "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$rm_subflow_nr" ]; then
+ echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
@@ -332,6 +456,7 @@ reset
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "unused signal address" 0 0 0
+chk_add_nr 1 1
# accept and use add_addr
reset
@@ -340,6 +465,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address" 1 1 1
+chk_add_nr 1 1
# accept and use add_addr with an additional subflow
# note: signal address in server ns and local addresses in client ns must
@@ -352,6 +478,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal" 2 2 2
+chk_add_nr 1 1
# accept and use add_addr with additional subflows
reset
@@ -362,6 +489,59 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
+chk_add_nr 1 1
+
+# single subflow, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 0 1
+chk_join_nr "remove single subflow" 1 1 1
+chk_rm_nr 1 1
+
+# multiple subflows, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 0 2
+chk_join_nr "remove multiple subflows" 2 2 2
+chk_rm_nr 2 2
+
+# single address, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+run_remove_tests $ns1 $ns2 10.0.1.1 1 0
+chk_join_nr "remove single address" 1 1 1
+chk_add_nr 1 1
+chk_rm_nr 0 0
+
+# subflow and signal, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 1 1
+chk_join_nr "remove subflow and signal" 2 2 2
+chk_add_nr 1 1
+chk_rm_nr 1 1
+
+# subflows and signal, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 1 2
+chk_join_nr "remove subflows and signal" 3 3 3
+chk_add_nr 1 1
+chk_rm_nr 2 2
# single subflow, syncookies
reset_with_cookies
@@ -396,6 +576,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address with syn cookies" 1 1 1
+chk_add_nr 1 1
# test cookie with subflow and signal
reset_with_cookies
@@ -405,6 +586,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and signal w cookies" 2 2 2
+chk_add_nr 1 1
# accept and use add_addr with additional subflows
reset_with_cookies
@@ -415,5 +597,6 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflows and signal w. cookies" 3 3 3
+chk_add_nr 1 1
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
new file mode 100755
index 000000000000..2f649b431456
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+capture=false
+ksft_skip=4
+timeout=30
+test_cnt=1
+ret=0
+bail=0
+
+usage() {
+ echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+ echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-d: debug this script"
+}
+
+cleanup()
+{
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+ rm -f "$capout"
+
+ local netns
+ for netns in "$ns1" "$ns2" "$ns3";do
+ ip netns del $netns
+ done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+# "$ns1" ns2 ns3
+# ns1eth1 ns2eth1 ns2eth3 ns3eth1
+# netem
+# ns1eth2 ns2eth2
+# netem
+
+setup()
+{
+ large=$(mktemp)
+ small=$(mktemp)
+ sout=$(mktemp)
+ cout=$(mktemp)
+ capout=$(mktemp)
+ size=$((2048 * 4096))
+ dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
+ dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
+
+ trap cleanup EXIT
+
+ for i in "$ns1" "$ns2" "$ns3";do
+ ip netns add $i || exit $ksft_skip
+ ip -net $i link set lo up
+ done
+
+ ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+ ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
+ ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
+
+ ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
+ ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
+ ip -net "$ns1" link set ns1eth1 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.1.2
+ ip -net "$ns1" route add default via dead:beef:1::2
+
+ ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ ip -net "$ns1" link set ns1eth2 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.2.2 metric 101
+ ip -net "$ns1" route add default via dead:beef:2::2 metric 101
+
+ ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
+ ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+ ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
+
+ ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ ip -net "$ns2" link set ns2eth1 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
+ ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
+ ip -net "$ns2" link set ns2eth2 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
+ ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
+ ip -net "$ns2" link set ns2eth3 up mtu 1500
+ ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
+ ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
+
+ ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
+ ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
+ ip -net "$ns3" link set ns3eth1 up mtu 1500
+ ip -net "$ns3" route add default via 10.0.3.2
+ ip -net "$ns3" route add default via dead:beef:3::2
+
+ ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+}
+
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+do_transfer()
+{
+ local cin=$1
+ local sin=$2
+ local max_time=$3
+ local port
+ port=$((10000+$test_cnt))
+ test_cnt=$((test_cnt+1))
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ local addr_port
+ addr_port=$(printf "%s:%d" ${connect_addr} ${port})
+
+ if $capture; then
+ local capuser
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ local capfile="${rndh}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+ ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
+
+ ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
+
+ sleep 1
+ fi
+
+ ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+ local spid=$!
+
+ wait_local_port_listen "${ns3}" "${port}"
+
+ local start
+ start=$(date +%s%3N)
+ ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+ local cpid=$!
+
+ wait $cpid
+ local retc=$?
+ wait $spid
+ local rets=$?
+
+ local stop
+ stop=$(date +%s%3N)
+
+ if $capture; then
+ sleep 1
+ kill ${cappid_listener}
+ kill ${cappid_connector}
+ fi
+
+ local duration
+ duration=$((stop-start))
+
+ cmp $sin $cout > /dev/null 2>&1
+ local cmps=$?
+ cmp $cin $sout > /dev/null 2>&1
+ local cmpc=$?
+
+ printf "%16s" "$duration max $max_time "
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
+ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \
+ [ $duration -lt $max_time ]; then
+ echo "[ OK ]"
+ cat "$capout"
+ return 0
+ fi
+
+ echo " [ fail ]"
+ echo "client exit code $retc, server $rets" 1>&2
+ echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
+ ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
+ echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
+ ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+ ls -l $sin $cout
+ ls -l $cin $sout
+
+ cat "$capout"
+ return 1
+}
+
+run_test()
+{
+ local rate1=$1
+ local rate2=$2
+ local delay1=$3
+ local delay2=$4
+ local lret
+ local dev
+ shift 4
+ local msg=$*
+
+ [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
+ [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
+
+ for dev in ns1eth1 ns1eth2; do
+ tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ for dev in ns2eth1 ns2eth2; do
+ tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+ # time is measure in ms
+ local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) ))
+
+ # mptcp_connect will do some sleeps to allow the mp_join handshake
+ # completion
+ time=$((time + 1350))
+
+ printf "%-50s" "$msg"
+ do_transfer $small $large $((time * 11 / 10))
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+
+ printf "%-50s" "$msg - reverse direction"
+ do_transfer $large $small $((time * 11 / 10))
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+}
+
+while getopts "bcdh" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "b")
+ bail=1
+ ;;
+ "c")
+ capture=true
+ ;;
+ "d")
+ set -x
+ ;;
+ "?")
+ usage $0
+ exit 1
+ ;;
+ esac
+done
+
+setup
+run_test 10 10 0 0 "balanced bwidth"
+run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+
+# we still need some additional infrastructure to pass the following test-cases
+# run_test 30 10 0 0 "unbalanced bwidth"
+# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+exit $ret
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 93208caacbe6..f75c53ce0a2d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -1667,6 +1667,8 @@ int main(int argc, char *argv[])
case 'R':
args.type = SOCK_RAW;
args.port = 0;
+ if (!args.protocol)
+ args.protocol = IPPROTO_RAW;
break;
case 'P':
pe = getprotobyname(optarg);
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 6331d91b86a6..170be65e0816 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -45,7 +45,7 @@ echo "raw vnet hdr"
echo "raw csum_off"
./in_netns.sh ./psock_snd -v -c
-echo "raw csum_off with bad offset (fails)"
+echo "raw csum_off with bad offset (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -C)
@@ -57,7 +57,7 @@ echo "raw min size"
echo "raw mtu size"
./in_netns.sh ./psock_snd -l "${mss}"
-echo "raw mtu size + 1 (fails)"
+echo "raw mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
@@ -65,19 +65,19 @@ echo "raw mtu size + 1 (fails)"
# echo "raw vlan mtu size"
# ./in_netns.sh ./psock_snd -V -l "${mss}"
-echo "raw vlan mtu size + 1 (fails)"
+echo "raw vlan mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
echo "dgram mtu size"
./in_netns.sh ./psock_snd -d -l "${mss}"
-echo "dgram mtu size + 1 (fails)"
+echo "dgram mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
-echo "raw truncate hlen (fails: does not arrive)"
+echo "raw truncate hlen (expected to fail: does not arrive)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
-echo "raw truncate hlen - 1 (fails: EINVAL)"
+echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
@@ -86,13 +86,13 @@ echo "raw truncate hlen - 1 (fails: EINVAL)"
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (fails)"
+echo "raw gso min size - 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
-echo "raw gso max size + 1 (fails)"
+echo "raw gso max size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 8a2fe6d64bf2..c9ce3dfa42ee 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -520,6 +520,11 @@ kci_test_encap_fou()
return $ksft_skip
fi
+ if ! /sbin/modprobe -q -n fou; then
+ echo "SKIP: module fou is not found"
+ return $ksft_skip
+ fi
+ /sbin/modprobe -q fou
ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
if [ $? -ne 0 ];then
echo "FAIL: can't add fou port 7777, skipping test"
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index a61b7b3da549..00f837c9bc6c 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -123,6 +123,28 @@ void hash_zone(void *zone, unsigned int length)
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+
+static void *mmap_large_buffer(size_t need, size_t *allocated)
+{
+ void *buffer;
+ size_t sz;
+
+ /* Attempt to use huge pages if possible. */
+ sz = ALIGN_UP(need, map_align);
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+
+ if (buffer == (void *)-1) {
+ sz = need;
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buffer != (void *)-1)
+ fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
+ }
+ *allocated = sz;
+ return buffer;
+}
+
void *child_thread(void *arg)
{
unsigned long total_mmap = 0, total = 0;
@@ -135,6 +157,7 @@ void *child_thread(void *arg)
void *addr = NULL;
double throughput;
struct rusage ru;
+ size_t buffer_sz;
int lu, fd;
fd = (int)(unsigned long)arg;
@@ -142,9 +165,9 @@ void *child_thread(void *arg)
gettimeofday(&t0, NULL);
fcntl(fd, F_SETFL, O_NDELAY);
- buffer = malloc(chunk_size);
- if (!buffer) {
- perror("malloc");
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
+ if (buffer == (void *)-1) {
+ perror("mmap");
goto error;
}
if (zflg) {
@@ -179,6 +202,10 @@ void *child_thread(void *arg)
total_mmap += zc.length;
if (xflg)
hash_zone(addr, zc.length);
+ /* It is more efficient to unmap the pages right now,
+ * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
+ */
+ madvise(addr, zc.length, MADV_DONTNEED);
total += zc.length;
}
if (zc.recv_skip_hint) {
@@ -230,7 +257,7 @@ end:
ru.ru_nvcsw);
}
error:
- free(buffer);
+ munmap(buffer, buffer_sz);
close(fd);
if (zflg)
munmap(raddr, chunk_size + map_align);
@@ -347,6 +374,7 @@ int main(int argc, char *argv[])
uint64_t total = 0;
char *host = NULL;
int fd, c, on = 1;
+ size_t buffer_sz;
char *buffer;
int sflg = 0;
int mss = 0;
@@ -437,8 +465,8 @@ int main(int argc, char *argv[])
}
do_accept(fdlisten);
}
- buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
if (buffer == (char *)-1) {
perror("mmap");
exit(1);
@@ -484,6 +512,6 @@ int main(int argc, char *argv[])
total += wr;
}
close(fd);
- munmap(buffer, chunk_size);
+ munmap(buffer, buffer_sz);
return 0;
}
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
new file mode 100755
index 000000000000..23cf924754a5
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -0,0 +1,626 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
+#
+# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
+#
+#
+# Symmetric routing topology
+#
+# blue red
+# +----+ .253 +----+ .253 +----+
+# | h1 |-------------------| r1 |-------------------| h2 |
+# +----+ .1 +----+ .2 +----+
+# 172.16.1/24 172.16.2/24
+# 2001:db8:16:1/64 2001:db8:16:2/64
+#
+#
+# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
+# to the outgoing vrf red for the n2 network and red has a route back to n1.
+# The red VRF interface has a MTU of 1400.
+#
+# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
+# output of the command to check that a ttl expired error is received.
+#
+# The second test runs traceroute from h1 to h2 and parses the output to check
+# for a hop on r1.
+#
+# The third test sends a ping with a packet size of 1450 from h1 to h2 and
+# parses the output of the command to check that a fragmentation error is
+# received.
+#
+#
+# Asymmetric routing topology
+#
+# This topology represents a customer setup where the issue with icmp errors
+# and VRF route leaking was initialy reported. The MTU test isn't done here
+# because of the lack of a return route in the red VRF.
+#
+# blue red
+# .253 +----+ .253
+# +----| r1 |----+
+# | +----+ |
+# +----+ | | +----+
+# | h1 |--------------+ +--------------| h2 |
+# +----+ .1 | | .2 +----+
+# 172.16.1/24 | +----+ | 172.16.2/24
+# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
+# .254 +----+ .254
+#
+#
+# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
+# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
+# Route from h2 to h1 goes through r2.
+#
+# The objective is to check that the incoming vrf routing table is selected
+# to send an ICMP error back to the source when the ttl of a packet reaches 1
+# while it is forwarded between different vrfs.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+DEFAULT_TTYPE=sym
+
+H1_N1=172.16.1.0/24
+H1_N1_6=2001:db8:16:1::/64
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+
+H2_N2_IP=172.16.2.2
+R1_N2_IP=172.16.2.253
+R2_N2_IP=172.16.2.254
+
+H2_N2_IP6=2001:db8:16:2::2
+R1_N2_IP6=2001:db8:16:2::253
+R2_N2_IP6=2001:db8:16:2::254
+
+################################################################################
+# helpers
+
+log_section()
+{
+ echo
+ echo "###########################################################################"
+ echo "$*"
+ echo "###########################################################################"
+ echo
+}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read -r a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+run_cmd_grep()
+{
+ local grep_pattern="$1"
+ shift
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ echo "$out" | grep -q "$grep_pattern"
+ rc=$?
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+ local ns
+
+ for ns in h1 h2 r1 r2; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+setup_vrf()
+{
+ local ns=$1
+
+ ip -netns "${ns}" rule del pref 0
+ ip -netns "${ns}" rule add pref 32765 from all lookup local
+ ip -netns "${ns}" -6 rule del pref 0
+ ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
+}
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+
+ ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
+ ip -netns "${ns}" link set "${vrf}" up
+ ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
+ ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
+
+ ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
+ ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
+}
+
+setup_sym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+
+ #
+ # h2
+ #
+ ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 up
+
+ # h2 to h1 via r1
+ ip -netns h2 route add default via ${R1_N2_IP} dev eth0
+ ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # Route leak from red to blue
+ ip -netns r1 route add vrf red ${H1_N1} dev blue
+ ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
+
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+setup_asym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1 r2; do
+ ip netns add $ns
+ ip -netns $ns link set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 link add eth0 type veth peer name r1h1
+ ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+ ip -netns h1 link add eth1 type veth peer name r2h1
+ ip -netns h1 link set r2h1 netns r2 name eth0 up
+
+ ip -netns h2 link add eth0 type veth peer name r1h2
+ ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+ ip -netns h2 link add eth1 type veth peer name r2h2
+ ip -netns h2 link set r2h2 netns r2 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns h1 link add br0 type bridge
+ ip -netns h1 link set br0 up
+ ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 link set eth0 master br0 up
+ ip -netns h1 link set eth1 master br0 up
+
+ # h1 to h2 via r1
+ ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0
+ ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+
+ #
+ # h2
+ #
+ ip -netns h2 link add br0 type bridge
+ ip -netns h2 link set br0 up
+ ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
+ ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 link set eth0 master br0 up
+ ip -netns h2 link set eth1 master br0 up
+
+ # h2 to h1 via r2
+ ip -netns h2 route add default via ${R2_N2_IP} dev br0
+ ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
+
+ #
+ # r1
+ #
+ setup_vrf r1
+ create_vrf r1 blue 1101
+ create_vrf r1 red 1102
+ ip -netns r1 link set mtu 1400 dev eth1
+ ip -netns r1 link set eth0 vrf blue up
+ ip -netns r1 link set eth1 vrf red up
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns r1 route add vrf blue ${H2_N2} dev red
+ ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # No route leak from red to blue
+
+ #
+ # r2
+ #
+ ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+ ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+ ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
+ ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+check_connectivity()
+{
+ ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv4 connectivity"
+ return $?
+}
+
+check_connectivity6()
+{
+ ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv6 connectivity"
+ return $?
+}
+
+check_traceroute()
+{
+ if [ ! -x "$(command -v traceroute)" ]; then
+ echo "SKIP: Could not run IPV4 test without traceroute"
+ return 1
+ fi
+}
+
+check_traceroute6()
+{
+ if [ ! -x "$(command -v traceroute6)" ]; then
+ echo "SKIP: Could not run IPV6 test without traceroute6"
+ return 1
+ fi
+}
+
+ipv4_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute || return
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
+ log_test $? 0 "Traceroute reports a hop on r1"
+}
+
+ipv4_traceroute_asym()
+{
+ ipv4_traceroute asym
+}
+
+ipv6_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute6 || return
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
+ log_test $? 0 "Traceroute6 reports a hop on r1"
+}
+
+ipv6_traceroute_asym()
+{
+ ipv6_traceroute asym
+}
+
+ipv4_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP ttl exceeded"
+}
+
+ipv4_ping_ttl_asym()
+{
+ ipv4_ping_ttl asym
+}
+
+ipv4_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP Frag needed"
+}
+
+ipv4_ping_frag_asym()
+{
+ ipv4_ping_frag asym
+}
+
+ipv6_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Hop limit"
+}
+
+ipv6_ping_ttl_asym()
+{
+ ipv6_ping_ttl asym
+}
+
+ipv6_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Packet too big"
+}
+
+ipv6_ping_frag_asym()
+{
+ ipv6_ping_frag asym
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -4 Run IPv4 tests only
+ -6 Run IPv6 tests only
+ -t TEST Run only TEST
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :46t:pvh o
+do
+ case $o in
+ 4) TESTS=ipv4;;
+ 6) TESTS=ipv6;;
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+#
+# show user test config
+#
+if [ -z "$TESTS" ]; then
+ TESTS="$TESTS_IPV4 $TESTS_IPV6"
+elif [ "$TESTS" = "ipv4" ]; then
+ TESTS="$TESTS_IPV4"
+elif [ "$TESTS" = "ipv6" ]; then
+ TESTS="$TESTS_IPV6"
+fi
+
+for t in $TESTS
+do
+ case $t in
+ ipv4_ping_ttl|ping) ipv4_ping_ttl;;&
+ ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;&
+ ipv4_traceroute|traceroute) ipv4_traceroute;;&
+ ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
+ ipv4_ping_frag|ping) ipv4_ping_frag;;&
+
+ ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
+ ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
+ ipv6_traceroute|traceroute) ipv6_traceroute;;&
+ ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
+ ipv6_ping_frag|ping) ipv6_ping_frag;;&
+
+ # setup namespaces and config, but do not run any tests
+ setup_sym|setup) setup_sym; exit 0;;
+ setup_asym) setup_asym; exit 0;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret