Merge branch 'add-support-for-per-route-seg6-tunsrc'

Justin Iurman says:

====================
Add support for per-route seg6 tunsrc

This series adds support for the new per-route seg6 "tunsrc" parameter.
Selftests are extended to make sure it works as expected.

Example with the iproute2-next companion patch:

ip -6 r a 2001:db8:1::/64 encap seg6 mode encap tunsrc 2001:db8:ab::
    segs 2001:db8:42::1,2001:db8:ffff::2 dev eth0
====================

Link: https://patch.msgid.link/20260324091434.359341-1-justin.iurman@6wind.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2026-03-26 18:45:32 -07:00
3 changed files with 180 additions and 44 deletions
+1
View File
@@ -20,6 +20,7 @@
enum {
SEG6_IPTUNNEL_UNSPEC,
SEG6_IPTUNNEL_SRH,
SEG6_IPTUNNEL_SRC, /* struct in6_addr */
__SEG6_IPTUNNEL_MAX,
};
#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1)
+83 -31
View File
@@ -49,6 +49,7 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
struct seg6_lwt {
struct dst_cache cache;
struct in6_addr tunsrc;
struct seg6_iptunnel_encap tuninfo[];
};
@@ -65,6 +66,7 @@ seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
[SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY },
[SEG6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
};
static int nla_put_srh(struct sk_buff *skb, int attrtype,
@@ -87,23 +89,32 @@ static int nla_put_srh(struct sk_buff *skb, int attrtype,
}
static void set_tun_src(struct net *net, struct net_device *dev,
struct in6_addr *daddr, struct in6_addr *saddr)
struct in6_addr *daddr, struct in6_addr *saddr,
struct in6_addr *route_tunsrc)
{
struct seg6_pernet_data *sdata = seg6_pernet(net);
struct in6_addr *tun_src;
rcu_read_lock();
tun_src = rcu_dereference(sdata->tun_src);
if (!ipv6_addr_any(tun_src)) {
memcpy(saddr, tun_src, sizeof(struct in6_addr));
/* Priority order to select tunnel source address:
* 1. per route source address (if configured)
* 2. per network namespace source address (if configured)
* 3. dynamic resolution
*/
if (route_tunsrc && !ipv6_addr_any(route_tunsrc)) {
memcpy(saddr, route_tunsrc, sizeof(struct in6_addr));
} else {
ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
saddr);
}
rcu_read_lock();
tun_src = rcu_dereference(sdata->tun_src);
rcu_read_unlock();
if (!ipv6_addr_any(tun_src)) {
memcpy(saddr, tun_src, sizeof(struct in6_addr));
} else {
ipv6_dev_get_saddr(net, dev, daddr,
IPV6_PREFER_SRC_PUBLIC, saddr);
}
rcu_read_unlock();
}
}
/* Compute flowlabel for outer IPv6 header */
@@ -125,7 +136,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
}
static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
int proto, struct dst_entry *cache_dst)
int proto, struct dst_entry *cache_dst,
struct in6_addr *route_tunsrc)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst_dev(dst);
@@ -182,7 +194,7 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
@@ -202,14 +214,15 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
{
return __seg6_do_srh_encap(skb, osrh, proto, NULL);
return __seg6_do_srh_encap(skb, osrh, proto, NULL, NULL);
}
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
static int seg6_do_srh_encap_red(struct sk_buff *skb,
struct ipv6_sr_hdr *osrh, int proto,
struct dst_entry *cache_dst)
struct dst_entry *cache_dst,
struct in6_addr *route_tunsrc)
{
__u8 first_seg = osrh->first_segment;
struct dst_entry *dst = skb_dst(skb);
@@ -272,7 +285,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
if (skip_srh) {
hdr->nexthdr = proto;
set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc);
goto out;
}
@@ -308,7 +321,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
srcaddr:
isrh->nexthdr = proto;
set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (unlikely(!skip_srh && sr_has_hmac(isrh))) {
@@ -383,9 +396,11 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
struct seg6_lwt *slwt;
int proto, err = 0;
tinfo = seg6_encap_lwtunnel(dst->lwtstate);
slwt = seg6_lwt_lwtunnel(dst->lwtstate);
tinfo = slwt->tuninfo;
switch (tinfo->mode) {
case SEG6_IPTUN_MODE_INLINE:
@@ -410,11 +425,11 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
return -EINVAL;
if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
err = __seg6_do_srh_encap(skb, tinfo->srh,
proto, cache_dst);
err = __seg6_do_srh_encap(skb, tinfo->srh, proto,
cache_dst, &slwt->tunsrc);
else
err = seg6_do_srh_encap_red(skb, tinfo->srh,
proto, cache_dst);
err = seg6_do_srh_encap_red(skb, tinfo->srh, proto,
cache_dst, &slwt->tunsrc);
if (err)
return err;
@@ -436,12 +451,12 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
err = __seg6_do_srh_encap(skb, tinfo->srh,
IPPROTO_ETHERNET,
cache_dst);
IPPROTO_ETHERNET, cache_dst,
&slwt->tunsrc);
else
err = seg6_do_srh_encap_red(skb, tinfo->srh,
IPPROTO_ETHERNET,
cache_dst);
IPPROTO_ETHERNET, cache_dst,
&slwt->tunsrc);
if (err)
return err;
@@ -678,6 +693,10 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
if (family != AF_INET6)
return -EINVAL;
if (tb[SEG6_IPTUNNEL_SRC]) {
NL_SET_ERR_MSG(extack, "incompatible mode for tunsrc");
return -EINVAL;
}
break;
case SEG6_IPTUN_MODE_ENCAP:
break;
@@ -702,13 +721,23 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
slwt = seg6_lwt_lwtunnel(newts);
err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
if (err) {
kfree(newts);
return err;
}
if (err)
goto free_lwt_state;
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
if (tb[SEG6_IPTUNNEL_SRC]) {
slwt->tunsrc = nla_get_in6_addr(tb[SEG6_IPTUNNEL_SRC]);
if (ipv6_addr_any(&slwt->tunsrc) ||
ipv6_addr_is_multicast(&slwt->tunsrc) ||
ipv6_addr_loopback(&slwt->tunsrc)) {
NL_SET_ERR_MSG(extack, "invalid tunsrc address");
err = -EINVAL;
goto free_dst_cache;
}
}
newts->type = LWTUNNEL_ENCAP_SEG6;
newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
@@ -720,6 +749,12 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
*ts = newts;
return 0;
free_dst_cache:
dst_cache_destroy(&slwt->cache);
free_lwt_state:
kfree(newts);
return err;
}
static void seg6_destroy_state(struct lwtunnel_state *lwt)
@@ -731,29 +766,46 @@ static int seg6_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate);
if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
return -EMSGSIZE;
if (!ipv6_addr_any(&slwt->tunsrc) &&
nla_put_in6_addr(skb, SEG6_IPTUNNEL_SRC, &slwt->tunsrc))
return -EMSGSIZE;
return 0;
}
static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
{
struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate);
int nlsize;
return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
nlsize = nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
if (!ipv6_addr_any(&slwt->tunsrc))
nlsize += nla_total_size(sizeof(slwt->tunsrc));
return nlsize;
}
static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
{
struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
struct seg6_lwt *a_slwt = seg6_lwt_lwtunnel(a);
struct seg6_lwt *b_slwt = seg6_lwt_lwtunnel(b);
int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
return 1;
if (!ipv6_addr_equal(&a_slwt->tunsrc, &b_slwt->tunsrc))
return 1;
return memcmp(a_hdr, b_hdr, len);
}
@@ -193,6 +193,8 @@ ret=${ksft_skip}
nsuccess=0
nfail=0
HAS_TUNSRC=false
log_test()
{
local rc="$1"
@@ -345,6 +347,17 @@ setup_rt_networking()
ip -netns "${nsname}" addr \
add "${net_prefix}::${rt}/64" dev "${devname}" nodad
# A dedicated ::dead:<rt> address (with preferred_lft 0, i.e.,
# deprecated) is added when there is support for tunsrc. Because
# it is deprecated, the kernel should never auto-select it as
# source with current config. Only an explicit tunsrc can place
# it in the outer header.
if $HAS_TUNSRC; then
ip -netns "${nsname}" addr \
add "${net_prefix}::dead:${rt}/64" \
dev "${devname}" nodad preferred_lft 0
fi
ip -netns "${nsname}" link set "${devname}" up
done
@@ -420,6 +433,7 @@ setup_rt_local_sids()
# to the destination host)
# $5 - encap mode (full or red)
# $6 - traffic type (IPv6 or IPv4)
# $7 - force tunsrc (true or false)
__setup_rt_policy()
{
local dst="$1"
@@ -428,10 +442,46 @@ __setup_rt_policy()
local dec_rt="$4"
local mode="$5"
local traffic="$6"
local with_tunsrc="$7"
local nsname
local policy=''
local tunsrc=''
local n
# Verify the per-route tunnel source address ("tunsrc") feature.
# If it is not supported, fallback on encap config without tunsrc.
if $with_tunsrc && $HAS_TUNSRC; then
local net_prefix
local drule
local nxt
eval nsname=\${$(get_rtname "${dec_rt}")}
# Next SRv6 hop: first End router if any, or the decap router
[ -z "${end_rts}" ] && nxt="${dec_rt}" || nxt="${end_rts%% *}"
# Use the right prefix for tunsrc depending on the next SRv6 hop
net_prefix="$(get_network_prefix "${encap_rt}" "${nxt}")"
tunsrc="tunsrc ${net_prefix}::dead:${encap_rt}"
# To verify that the outer source address matches the one
# configured with tunsrc, the decap router discards packets
# with any other source address.
ip netns exec "${nsname}" ip6tables -t raw -I PREROUTING 1 \
-s "${net_prefix}::dead:${encap_rt}" \
-d "${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" \
-j ACCEPT
drule="PREROUTING \
-d ${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC} \
-j DROP"
if ! ip netns exec "${nsname}" \
ip6tables -t raw -C ${drule} &>/dev/null; then
ip netns exec "${nsname}" ip6tables -t raw -A ${drule}
fi
fi
eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
@@ -444,7 +494,7 @@ __setup_rt_policy()
if [ "${traffic}" -eq 6 ]; then
ip -netns "${nsname}" -6 route \
add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
encap seg6 mode "${mode}" segs "${policy}" \
encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \
dev "${VRF_DEVNAME}"
ip -netns "${nsname}" -6 neigh \
@@ -455,7 +505,7 @@ __setup_rt_policy()
# received, otherwise the proxy arp does not work.
ip -netns "${nsname}" -4 route \
add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
encap seg6 mode "${mode}" segs "${policy}" \
encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \
dev "${VRF_DEVNAME}"
fi
}
@@ -463,13 +513,13 @@ __setup_rt_policy()
# see __setup_rt_policy
setup_rt_policy_ipv6()
{
__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6
__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 "$6"
}
#see __setup_rt_policy
setup_rt_policy_ipv4()
{
__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4
__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 "$6"
}
setup_hs()
@@ -567,41 +617,41 @@ setup()
# the network path between hs-1 and hs-2 traverses several routers
# depending on the direction of traffic.
#
# Direction hs-1 -> hs-2 (H.Encaps.Red)
# Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc)
# - rt-3,rt-4 (SRv6 End behaviors)
# - rt-2 (SRv6 End.DT46 behavior)
#
# Direction hs-2 -> hs-1 (H.Encaps.Red)
# - rt-1 (SRv6 End.DT46 behavior)
setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red
setup_rt_policy_ipv6 1 2 "" 1 encap.red
setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red true
setup_rt_policy_ipv6 1 2 "" 1 encap.red false
# create an IPv4 VPN between hosts hs-1 and hs-2
# the network path between hs-1 and hs-2 traverses several routers
# depending on the direction of traffic.
#
# Direction hs-1 -> hs-2 (H.Encaps.Red)
# Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc)
# - rt-2 (SRv6 End.DT46 behavior)
#
# Direction hs-2 -> hs-1 (H.Encaps.Red)
# - rt-4,rt-3 (SRv6 End behaviors)
# - rt-1 (SRv6 End.DT46 behavior)
setup_rt_policy_ipv4 2 1 "" 2 encap.red
setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red
setup_rt_policy_ipv4 2 1 "" 2 encap.red true
setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red false
# create an IPv6 VPN between hosts hs-3 and hs-4
# the network path between hs-3 and hs-4 traverses several routers
# depending on the direction of traffic.
#
# Direction hs-3 -> hs-4 (H.Encaps.Red)
# Direction hs-3 -> hs-4 (H.Encaps.Red + tunsrc)
# - rt-2 (SRv6 End Behavior)
# - rt-4 (SRv6 End.DT46 behavior)
#
# Direction hs-4 -> hs-3 (H.Encaps.Red)
# - rt-1 (SRv6 End behavior)
# - rt-3 (SRv6 End.DT46 behavior)
setup_rt_policy_ipv6 4 3 "2" 4 encap.red
setup_rt_policy_ipv6 3 4 "1" 3 encap.red
setup_rt_policy_ipv6 4 3 "2" 4 encap.red true
setup_rt_policy_ipv6 3 4 "1" 3 encap.red false
# testing environment was set up successfully
SETUP_ERR=0
@@ -809,6 +859,38 @@ test_vrf_or_ksft_skip()
fi
}
# Before enabling tunsrc tests, make sure tunsrc and ip6tables are supported.
check_tunsrc_support()
{
setup_ns tunsrc_ns
ip -netns "${tunsrc_ns}" link add veth0 type veth \
peer name veth1 netns "${tunsrc_ns}"
ip -netns "${tunsrc_ns}" link set veth0 up
if ! ip -netns "${tunsrc_ns}" -6 route add fc00::dead:beef/128 \
encap seg6 mode encap.red tunsrc fc00::1 segs fc00::2 \
dev veth0 &>/dev/null; then
cleanup_ns "${tunsrc_ns}"
return
fi
if ! ip -netns "${tunsrc_ns}" -6 route show | grep -q "tunsrc"; then
cleanup_ns "${tunsrc_ns}"
return
fi
if ! ip netns exec "${tunsrc_ns}" ip6tables -t raw -A PREROUTING \
-d fc00::dead:beef -j DROP &>/dev/null; then
cleanup_ns "${tunsrc_ns}"
return
fi
cleanup_ns "${tunsrc_ns}"
HAS_TUNSRC=true
}
if [ "$(id -u)" -ne 0 ]; then
echo "SKIP: Need root privileges"
exit "${ksft_skip}"
@@ -826,6 +908,7 @@ test_vrf_or_ksft_skip
set -e
trap cleanup EXIT
check_tunsrc_support
setup
set +e