aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/vrf.c78
-rw-r--r--include/uapi/linux/seg6_local.h1
-rw-r--r--net/ipv6/seg6_local.c590
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh494
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh502
5 files changed, 1646 insertions, 19 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index f8d711a84763..259d5cbacf2c 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1310,6 +1310,61 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
skb_dst_set(skb, &rt6->dst);
}
+static int vrf_prepare_mac_header(struct sk_buff *skb,
+ struct net_device *vrf_dev, u16 proto)
+{
+ struct ethhdr *eth;
+ int err;
+
+ /* in general, we do not know if there is enough space in the head of
+ * the packet for hosting the mac header.
+ */
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(vrf_dev));
+ if (unlikely(err))
+ /* no space in the skb head */
+ return -ENOBUFS;
+
+ __skb_push(skb, ETH_HLEN);
+ eth = (struct ethhdr *)skb->data;
+
+ skb_reset_mac_header(skb);
+
+ /* we set the ethernet destination and the source addresses to the
+ * address of the VRF device.
+ */
+ ether_addr_copy(eth->h_dest, vrf_dev->dev_addr);
+ ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
+ eth->h_proto = htons(proto);
+
+ /* the destination address of the Ethernet frame corresponds to the
+ * address set on the VRF interface; therefore, the packet is intended
+ * to be processed locally.
+ */
+ skb->protocol = eth->h_proto;
+ skb->pkt_type = PACKET_HOST;
+
+ skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+
+ skb_pull_inline(skb, ETH_HLEN);
+
+ return 0;
+}
+
+/* prepare and add the mac header to the packet if it was not set previously.
+ * In this way, packet sniffers such as tcpdump can parse the packet correctly.
+ * If the mac header was already set, the original mac header is left
+ * untouched and the function returns immediately.
+ */
+static int vrf_add_mac_header_if_unset(struct sk_buff *skb,
+ struct net_device *vrf_dev,
+ u16 proto)
+{
+ if (skb_mac_header_was_set(skb))
+ return 0;
+
+ return vrf_prepare_mac_header(skb, vrf_dev, proto);
+}
+
static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
struct sk_buff *skb)
{
@@ -1336,9 +1391,15 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
skb->skb_iif = vrf_dev->ifindex;
if (!list_empty(&vrf_dev->ptype_all)) {
- skb_push(skb, skb->mac_len);
- dev_queue_xmit_nit(skb, vrf_dev);
- skb_pull(skb, skb->mac_len);
+ int err;
+
+ err = vrf_add_mac_header_if_unset(skb, vrf_dev,
+ ETH_P_IPV6);
+ if (likely(!err)) {
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+ skb_pull(skb, skb->mac_len);
+ }
}
IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
@@ -1381,9 +1442,14 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
vrf_rx_stats(vrf_dev, skb->len);
if (!list_empty(&vrf_dev->ptype_all)) {
- skb_push(skb, skb->mac_len);
- dev_queue_xmit_nit(skb, vrf_dev);
- skb_pull(skb, skb->mac_len);
+ int err;
+
+ err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP);
+ if (likely(!err)) {
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+ skb_pull(skb, skb->mac_len);
+ }
}
skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
index edc138bdc56d..3b39ef1dbb46 100644
--- a/include/uapi/linux/seg6_local.h
+++ b/include/uapi/linux/seg6_local.h
@@ -26,6 +26,7 @@ enum {
SEG6_LOCAL_IIF,
SEG6_LOCAL_OIF,
SEG6_LOCAL_BPF,
+ SEG6_LOCAL_VRFTABLE,
__SEG6_LOCAL_MAX,
};
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index eba23279912d..b07f7c1c82a4 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -33,11 +33,35 @@
struct seg6_local_lwt;
+/* callbacks used for customizing the creation and destruction of a behavior */
+struct seg6_local_lwtunnel_ops {
+ int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack);
+ void (*destroy_state)(struct seg6_local_lwt *slwt);
+};
+
struct seg6_action_desc {
int action;
unsigned long attrs;
+
+ /* The optattrs field is used for specifying all the optional
+ * attributes supported by a specific behavior.
+ * It means that if one of these attributes is not provided in the
+ * netlink message during the behavior creation, no errors will be
+ * returned to the userspace.
+ *
+ * Each attribute can be only of two types (mutually exclusive):
+ * 1) required or 2) optional.
+ * Every user MUST obey to this rule! If you set an attribute as
+ * required the same attribute CANNOT be set as optional and vice
+ * versa.
+ */
+ unsigned long optattrs;
+
int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
int static_headroom;
+
+ struct seg6_local_lwtunnel_ops slwt_ops;
};
struct bpf_lwt_prog {
@@ -45,6 +69,28 @@ struct bpf_lwt_prog {
char *name;
};
+enum seg6_end_dt_mode {
+ DT_INVALID_MODE = -EINVAL,
+ DT_LEGACY_MODE = 0,
+ DT_VRF_MODE = 1,
+};
+
+struct seg6_end_dt_info {
+ enum seg6_end_dt_mode mode;
+
+ struct net *net;
+ /* VRF device associated to the routing table used by the SRv6
+ * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
+ */
+ int vrf_ifindex;
+ int vrf_table;
+
+ /* tunneled packet proto and family (IPv4 or IPv6) */
+ __be16 proto;
+ u16 family;
+ int hdrlen;
+};
+
struct seg6_local_lwt {
int action;
struct ipv6_sr_hdr *srh;
@@ -54,9 +100,16 @@ struct seg6_local_lwt {
int iif;
int oif;
struct bpf_lwt_prog bpf;
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ struct seg6_end_dt_info dt_info;
+#endif
int headroom;
struct seg6_action_desc *desc;
+ /* unlike the required attrs, we have to track the optional attributes
+ * that have been effectively parsed.
+ */
+ unsigned long parsed_optattrs;
};
static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
@@ -401,6 +454,248 @@ drop:
return -EINVAL;
}
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
+{
+ const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
+
+ return nli->nl_net;
+}
+
+static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
+ u16 family, struct netlink_ext_ack *extack)
+{
+ struct seg6_end_dt_info *info = &slwt->dt_info;
+ int vrf_ifindex;
+ struct net *net;
+
+ net = fib6_config_get_net(cfg);
+
+ /* note that vrf_table was already set by parse_nla_vrftable() */
+ vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
+ info->vrf_table);
+ if (vrf_ifindex < 0) {
+ if (vrf_ifindex == -EPERM) {
+ NL_SET_ERR_MSG(extack,
+ "Strict mode for VRF is disabled");
+ } else if (vrf_ifindex == -ENODEV) {
+ NL_SET_ERR_MSG(extack,
+ "Table has no associated VRF device");
+ } else {
+ pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
+ vrf_ifindex);
+ }
+
+ return vrf_ifindex;
+ }
+
+ info->net = net;
+ info->vrf_ifindex = vrf_ifindex;
+
+ switch (family) {
+ case AF_INET:
+ info->proto = htons(ETH_P_IP);
+ info->hdrlen = sizeof(struct iphdr);
+ break;
+ case AF_INET6:
+ info->proto = htons(ETH_P_IPV6);
+ info->hdrlen = sizeof(struct ipv6hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ info->family = family;
+ info->mode = DT_VRF_MODE;
+
+ return 0;
+}
+
+/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
+ * routes the IPv4/IPv6 packet by looking at the configured routing table.
+ *
+ * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
+ * Routing Header packets) from several interfaces and the outer IPv6
+ * destination address (DA) is used for retrieving the specific instance of the
+ * End.DT4/DT6 behavior that should process the packets.
+ *
+ * However, the inner IPv4/IPv6 packet is not really bound to any receiving
+ * interface and thus the End.DT4/DT6 sets the VRF (associated with the
+ * corresponding routing table) as the *receiving* interface.
+ * In other words, the End.DT4/DT6 processes a packet as if it has been received
+ * directly by the VRF (and not by one of its slave devices, if any).
+ * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
+ * according to the routing table configured by the End.DT4/DT6 instance.
+ *
+ * This design allows you to get some interesting features like:
+ * 1) the statistics on rx packets;
+ * 2) the possibility to install a packet sniffer on the receiving interface
+ * (the VRF one) for looking at the incoming packets;
+ * 3) the possibility to leverage the netfilter prerouting hook for the inner
+ * IPv4 packet.
+ *
+ * This function returns:
+ * - the sk_buff* when the VRF rcv handler has processed the packet correctly;
+ * - NULL when the skb is consumed by the VRF rcv handler;
+ * - a pointer which encodes a negative error number in case of error.
+ * Note that in this case, the function takes care of freeing the skb.
+ */
+static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
+ struct net_device *dev)
+{
+ /* based on l3mdev_ip_rcv; we are only interested in the master */
+ if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
+ goto drop;
+
+ if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
+ goto drop;
+
+ /* the decap packet IPv4/IPv6 does not come with any mac header info.
+ * We must unset the mac header to allow the VRF device to rebuild it,
+ * just in case there is a sniffer attached on the device.
+ */
+ skb_unset_mac_header(skb);
+
+ skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
+ if (!skb)
+ /* the skb buffer was consumed by the handler */
+ return NULL;
+
+ /* when a packet is received by a VRF or by one of its slaves, the
+ * master device reference is set into the skb.
+ */
+ if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
+ goto drop;
+
+ return skb;
+
+drop:
+ kfree_skb(skb);
+ return ERR_PTR(-EINVAL);
+}
+
+static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
+ struct seg6_end_dt_info *info)
+{
+ int vrf_ifindex = info->vrf_ifindex;
+ struct net *net = info->net;
+
+ if (unlikely(vrf_ifindex < 0))
+ goto error;
+
+ if (unlikely(!net_eq(dev_net(skb->dev), net)))
+ goto error;
+
+ return dev_get_by_index_rcu(net, vrf_ifindex);
+
+error:
+ return NULL;
+}
+
+static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_end_dt_info *info = &slwt->dt_info;
+ struct net_device *vrf;
+
+ vrf = end_dt_get_vrf_rcu(skb, info);
+ if (unlikely(!vrf))
+ goto drop;
+
+ skb->protocol = info->proto;
+
+ skb_dst_drop(skb);
+
+ skb_set_transport_header(skb, info->hdrlen);
+
+ return end_dt_vrf_rcv(skb, info->family, vrf);
+
+drop:
+ kfree_skb(skb);
+ return ERR_PTR(-EINVAL);
+}
+
+static int input_action_end_dt4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct iphdr *iph;
+ int err;
+
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ skb = end_dt_vrf_core(skb, slwt);
+ if (!skb)
+ /* packet has been processed and consumed by the VRF */
+ return 0;
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ iph = ip_hdr(skb);
+
+ err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
+ if (unlikely(err))
+ goto drop;
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
+}
+
+static enum
+seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
+{
+ unsigned long parsed_optattrs = slwt->parsed_optattrs;
+ bool legacy, vrfmode;
+
+ legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE));
+ vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE));
+
+ if (!(legacy ^ vrfmode))
+ /* both are absent or present: invalid DT6 mode */
+ return DT_INVALID_MODE;
+
+ return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
+}
+
+static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
+{
+ struct seg6_end_dt_info *info = &slwt->dt_info;
+
+ return info->mode;
+}
+
+static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
+ struct seg6_end_dt_info *info = &slwt->dt_info;
+
+ switch (mode) {
+ case DT_LEGACY_MODE:
+ info->mode = DT_LEGACY_MODE;
+ return 0;
+ case DT_VRF_MODE:
+ return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
+ default:
+ NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
+ return -EINVAL;
+ }
+}
+#endif
+
static int input_action_end_dt6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
@@ -410,6 +705,28 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
+ goto legacy_mode;
+
+ /* DT6_VRF_MODE */
+ skb = end_dt_vrf_core(skb, slwt);
+ if (!skb)
+ /* packet has been processed and consumed by the VRF */
+ return 0;
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* note: this time we do not need to specify the table because the VRF
+ * takes care of selecting the correct table.
+ */
+ seg6_lookup_any_nexthop(skb, NULL, 0, true);
+
+ return dst_input(skb);
+
+legacy_mode:
+#endif
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
@@ -590,8 +907,27 @@ static struct seg6_action_desc seg6_action_table[] = {
.input = input_action_end_dx4,
},
{
+ .action = SEG6_LOCAL_ACTION_END_DT4,
+ .attrs = (1 << SEG6_LOCAL_VRFTABLE),
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ .input = input_action_end_dt4,
+ .slwt_ops = {
+ .build_state = seg6_end_dt4_build,
+ },
+#endif
+ },
+ {
.action = SEG6_LOCAL_ACTION_END_DT6,
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ .attrs = 0,
+ .optattrs = (1 << SEG6_LOCAL_TABLE) |
+ (1 << SEG6_LOCAL_VRFTABLE),
+ .slwt_ops = {
+ .build_state = seg6_end_dt6_build,
+ },
+#else
.attrs = (1 << SEG6_LOCAL_TABLE),
+#endif
.input = input_action_end_dt6,
},
{
@@ -649,6 +985,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
[SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
+ [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
[SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
.len = sizeof(struct in_addr) },
[SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
@@ -710,6 +1047,11 @@ static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return memcmp(a->srh, b->srh, len);
}
+static void destroy_attr_srh(struct seg6_local_lwt *slwt)
+{
+ kfree(slwt->srh);
+}
+
static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
@@ -733,6 +1075,53 @@ static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return 0;
}
+static struct
+seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
+{
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ return &slwt->dt_info;
+#else
+ return ERR_PTR(-EOPNOTSUPP);
+#endif
+}
+
+static int parse_nla_vrftable(struct nlattr **attrs,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
+
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
+ info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
+
+ return 0;
+}
+
+static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
+
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
+ if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
+ struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
+
+ if (info_a->vrf_table != info_b->vrf_table)
+ return 1;
+
+ return 0;
+}
+
static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
@@ -901,16 +1290,30 @@ static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return strcmp(a->bpf.name, b->bpf.name);
}
+static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
+{
+ kfree(slwt->bpf.name);
+ if (slwt->bpf.prog)
+ bpf_prog_put(slwt->bpf.prog);
+}
+
struct seg6_action_param {
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
+
+ /* optional destroy() callback useful for releasing resources which
+ * have been previously acquired in the corresponding parse()
+ * function.
+ */
+ void (*destroy)(struct seg6_local_lwt *slwt);
};
static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
.put = put_nla_srh,
- .cmp = cmp_nla_srh },
+ .cmp = cmp_nla_srh,
+ .destroy = destroy_attr_srh },
[SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
.put = put_nla_table,
@@ -934,14 +1337,130 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
.put = put_nla_bpf,
- .cmp = cmp_nla_bpf },
+ .cmp = cmp_nla_bpf,
+ .destroy = destroy_attr_bpf },
+
+ [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable,
+ .put = put_nla_vrftable,
+ .cmp = cmp_nla_vrftable },
};
+/* call the destroy() callback (if available) for each set attribute in
+ * @parsed_attrs, starting from the first attribute up to the @max_parsed
+ * (excluded) attribute.
+ */
+static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_action_param *param;
+ int i;
+
+ /* Every required seg6local attribute is identified by an ID which is
+ * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
+ *
+ * We scan the 'parsed_attrs' bitmask, starting from the first attribute
+ * up to the @max_parsed (excluded) attribute.
+ * For each set attribute, we retrieve the corresponding destroy()
+ * callback. If the callback is not available, then we skip to the next
+ * attribute; otherwise, we call the destroy() callback.
+ */
+ for (i = 0; i < max_parsed; ++i) {
+ if (!(parsed_attrs & (1 << i)))
+ continue;
+
+ param = &seg6_action_params[i];
+
+ if (param->destroy)
+ param->destroy(slwt);
+ }
+}
+
+/* release all the resources that may have been acquired during parsing
+ * operations.
+ */
+static void destroy_attrs(struct seg6_local_lwt *slwt)
+{
+ unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
+
+ __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
+}
+
+static int parse_nla_optional_attrs(struct nlattr **attrs,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_action_desc *desc = slwt->desc;
+ unsigned long parsed_optattrs = 0;
+ struct seg6_action_param *param;
+ int err, i;
+
+ for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) {
+ if (!(desc->optattrs & (1 << i)) || !attrs[i])
+ continue;
+
+ /* once here, the i-th attribute is provided by the
+ * userspace AND it is identified optional as well.
+ */
+ param = &seg6_action_params[i];
+
+ err = param->parse(attrs, slwt);
+ if (err < 0)
+ goto parse_optattrs_err;
+
+ /* current attribute has been correctly parsed */
+ parsed_optattrs |= (1 << i);
+ }
+
+ /* store in the tunnel state all the optional attributed successfully
+ * parsed.
+ */
+ slwt->parsed_optattrs = parsed_optattrs;
+
+ return 0;
+
+parse_optattrs_err:
+ __destroy_attrs(parsed_optattrs, i, slwt);
+
+ return err;
+}
+
+/* call the custom constructor of the behavior during its initialization phase
+ * and after that all its attributes have been parsed successfully.
+ */
+static int
+seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct seg6_action_desc *desc = slwt->desc;
+ struct seg6_local_lwtunnel_ops *ops;
+
+ ops = &desc->slwt_ops;
+ if (!ops->build_state)
+ return 0;
+
+ return ops->build_state(slwt, cfg, extack);
+}
+
+/* call the custom destructor of the behavior which is invoked before the
+ * tunnel is going to be destroyed.
+ */
+static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
+{
+ struct seg6_action_desc *desc = slwt->desc;
+ struct seg6_local_lwtunnel_ops *ops;
+
+ ops = &desc->slwt_ops;
+ if (!ops->destroy_state)
+ return;
+
+ ops->destroy_state(slwt);
+}
+
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
struct seg6_action_param *param;
struct seg6_action_desc *desc;
+ unsigned long invalid_attrs;
int i, err;
desc = __get_action_desc(slwt->action);
@@ -954,6 +1473,26 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
slwt->desc = desc;
slwt->headroom += desc->static_headroom;
+ /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
+ * disjoined, this allow us to release acquired resources by optional
+ * attributes and by required attributes independently from each other
+ * without any interfarence.
+ * In other terms, we are sure that we do not release some the acquired
+ * resources twice.
+ *
+ * Note that if an attribute is configured both as required and as
+ * optional, it means that the user has messed something up in the
+ * seg6_action_table. Therefore, this check is required for SRv6
+ * behaviors to work properly.
+ */
+ invalid_attrs = desc->attrs & desc->optattrs;
+ if (invalid_attrs) {
+ WARN_ONCE(1,
+ "An attribute cannot be both required AND optional");
+ return -EINVAL;
+ }
+
+ /* parse the required attributes */
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
if (desc->attrs & (1 << i)) {
if (!attrs[i])
@@ -963,11 +1502,24 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
err = param->parse(attrs, slwt);
if (err < 0)
- return err;
+ goto parse_attrs_err;
}
}
+ /* parse the optional attributes, if any */
+ err = parse_nla_optional_attrs(attrs, slwt);
+ if (err < 0)
+ goto parse_attrs_err;
+
return 0;
+
+parse_attrs_err:
+ /* release any resource that may have been acquired during the i-1
+ * parse() operations.
+ */
+ __destroy_attrs(desc->attrs, i, slwt);
+
+ return err;
}
static int seg6_local_build_state(struct net *net, struct nlattr *nla,
@@ -1003,6 +1555,10 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla,
if (err < 0)
goto out_free;
+ err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
+ if (err < 0)
+ goto out_destroy_attrs;
+
newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
newts->headroom = slwt->headroom;
@@ -1011,8 +1567,9 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla,
return 0;
+out_destroy_attrs:
+ destroy_attrs(slwt);
out_free:
- kfree(slwt->srh);
kfree(newts);
return err;
}
@@ -1021,12 +1578,9 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
{
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
- kfree(slwt->srh);
+ seg6_local_lwtunnel_destroy_state(slwt);
- if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
- kfree(slwt->bpf.name);
- bpf_prog_put(slwt->bpf.prog);
- }
+ destroy_attrs(slwt);
return;
}
@@ -1036,13 +1590,16 @@ static int seg6_local_fill_encap(struct sk_buff *skb,
{
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
struct seg6_action_param *param;
+ unsigned long attrs;
int i, err;
if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
return -EMSGSIZE;
+ attrs = slwt->desc->attrs | slwt->parsed_optattrs;
+
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
- if (slwt->desc->attrs & (1 << i)) {
+ if (attrs & (1 << i)) {
param = &seg6_action_params[i];
err = param->put(skb, slwt);
if (err < 0)
@@ -1061,7 +1618,7 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
nlsize = nla_total_size(4); /* action */
- attrs = slwt->desc->attrs;
+ attrs = slwt->desc->attrs | slwt->parsed_optattrs;
if (attrs & (1 << SEG6_LOCAL_SRH))
nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
@@ -1086,6 +1643,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
nla_total_size(MAX_PROG_NAME) +
nla_total_size(4);
+ if (attrs & (1 << SEG6_LOCAL_VRFTABLE))
+ nlsize += nla_total_size(4);
+
return nlsize;
}
@@ -1094,6 +1654,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a,
{
struct seg6_local_lwt *slwt_a, *slwt_b;
struct seg6_action_param *param;
+ unsigned long attrs_a, attrs_b;
int i;
slwt_a = seg6_local_lwtunnel(a);
@@ -1102,11 +1663,14 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a,
if (slwt_a->action != slwt_b->action)
return 1;
- if (slwt_a->desc->attrs != slwt_b->desc->attrs)
+ attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
+ attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
+
+ if (attrs_a != attrs_b)
return 1;
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
- if (slwt_a->desc->attrs & (1 << i)) {
+ if (attrs_a & (1 << i)) {
param = &seg6_action_params[i];
if (param->cmp(slwt_a, slwt_b))
return 1;
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
new file mode 100755
index 000000000000..ad7a9fc59934
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -0,0 +1,494 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <[email protected]>
+
+# This test is designed for evaluating the new SRv6 End.DT4 behavior used for
+# implementing IPv4 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT4 behavior and c) VRF.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT4 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv4 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID |Action |
+# +-------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID |Action |
+# +-------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200 |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-t${tid}-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT4 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT4 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv4 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv4 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
new file mode 100755
index 000000000000..68708f5e26a0
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -0,0 +1,502 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <[email protected]>
+# author: Paolo Lungaroni <[email protected]>
+
+# This test is designed for evaluating the new SRv6 End.DT6 behavior used for
+# implementing IPv6 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv6 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv6
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT6 behavior and c) VRF.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT6 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv6 L3 VPN for tenant 200 works exactly as the IPv6 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::254/64 | +----------+ | | +----------+ | cafe::254/64 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::3/64 | | | | cafe::4/64 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID |Action |
+# +-------------------------------------------------+
+# |fc00:21:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::4 |apply seg6 encap segs fc00:12:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID |Action |
+# +-------------------------------------------------+
+# |fc00:12:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::3 |apply seg6 encap segs fc00:21:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t200 |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-t${tid}-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT6 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT6 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv6 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv6 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv6 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}