diff options
Diffstat (limited to 'net/mpls')
-rw-r--r-- | net/mpls/af_mpls.c | 393 | ||||
-rw-r--r-- | net/mpls/internal.h | 58 | ||||
-rw-r--r-- | net/mpls/mpls_iptunnel.c | 13 |
3 files changed, 433 insertions, 31 deletions
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 5b77377e5a15..3818686182b2 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -7,7 +7,9 @@ #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/mpls.h> +#include <linux/netconf.h> #include <linux/vmalloc.h> +#include <linux/percpu.h> #include <net/ip.h> #include <net/dst.h> #include <net/sock.h> @@ -17,8 +19,8 @@ #include <net/netns/generic.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> -#include <net/addrconf.h> #endif +#include <net/addrconf.h> #include <net/nexthop.h> #include "internal.h" @@ -48,11 +50,6 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) return rt; } -static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) -{ - return rcu_dereference_rtnl(dev->mpls_ptr); -} - bool mpls_output_possible(const struct net_device *dev) { return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); @@ -98,6 +95,31 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) } EXPORT_SYMBOL_GPL(mpls_pkt_too_big); +void mpls_stats_inc_outucastpkts(struct net_device *dev, + const struct sk_buff *skb) +{ + struct mpls_dev *mdev; + + if (skb->protocol == htons(ETH_P_MPLS_UC)) { + mdev = mpls_dev_get(dev); + if (mdev) + MPLS_INC_STATS_LEN(mdev, skb->len, + tx_packets, + tx_bytes); + } else if (skb->protocol == htons(ETH_P_IP)) { + IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct inet6_dev *in6dev = __in6_dev_get(dev); + + if (in6dev) + IP6_UPD_PO_STATS(dev_net(dev), in6dev, + IPSTATS_MIB_OUT, skb->len); +#endif + } +} +EXPORT_SYMBOL_GPL(mpls_stats_inc_outucastpkts); + static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) { struct mpls_entry_decoded dec; @@ -255,6 +277,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, struct mpls_nh *nh; struct mpls_entry_decoded dec; struct net_device *out_dev; + struct mpls_dev *out_mdev; struct mpls_dev *mdev; unsigned int hh_len; unsigned int new_header_size; @@ -264,34 +287,39 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Careful this entire function runs inside of an rcu critical section */ mdev = mpls_dev_get(dev); - if (!mdev || !mdev->input_enabled) + if (!mdev) goto drop; - if (skb->pkt_type != PACKET_HOST) + MPLS_INC_STATS_LEN(mdev, skb->len, rx_packets, + rx_bytes); + + if (!mdev->input_enabled) { + MPLS_INC_STATS(mdev, rx_dropped); goto drop; + } + + if (skb->pkt_type != PACKET_HOST) + goto err; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) - goto drop; + goto err; if (!pskb_may_pull(skb, sizeof(*hdr))) - goto drop; + goto err; /* Read and decode the label */ hdr = mpls_hdr(skb); dec = mpls_entry_decode(hdr); rt = mpls_route_input_rcu(net, dec.label); - if (!rt) + if (!rt) { + MPLS_INC_STATS(mdev, rx_noroute); goto drop; + } nh = mpls_select_multipath(rt, skb); if (!nh) - goto drop; - - /* Find the output device */ - out_dev = rcu_dereference(nh->nh_dev); - if (!mpls_output_possible(out_dev)) - goto drop; + goto err; /* Pop the label */ skb_pull(skb, sizeof(*hdr)); @@ -300,20 +328,25 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, skb_orphan(skb); if (skb_warn_if_lro(skb)) - goto drop; + goto err; skb_forward_csum(skb); /* Verify ttl is valid */ if (dec.ttl <= 1) - goto drop; + goto err; dec.ttl -= 1; + /* Find the output device */ + out_dev = rcu_dereference(nh->nh_dev); + if (!mpls_output_possible(out_dev)) + goto tx_err; + /* Verify the destination can hold the packet */ new_header_size = mpls_nh_header_size(nh); mtu = mpls_dev_mtu(out_dev); if (mpls_pkt_too_big(skb, mtu - new_header_size)) - goto drop; + goto tx_err; hh_len = LL_RESERVED_SPACE(out_dev); if (!out_dev->header_ops) @@ -321,7 +354,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Ensure there is enough space for the headers in the skb */ if (skb_cow(skb, hh_len + new_header_size)) - goto drop; + goto tx_err; skb->dev = out_dev; skb->protocol = htons(ETH_P_MPLS_UC); @@ -329,7 +362,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, if (unlikely(!new_header_size && dec.bos)) { /* Penultimate hop popping */ if (!mpls_egress(rt, skb, dec)) - goto drop; + goto err; } else { bool bos; int i; @@ -345,6 +378,8 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, } } + mpls_stats_inc_outucastpkts(out_dev, skb); + /* If via wasn't specified then send out using device address */ if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC) err = neigh_xmit(NEIGH_LINK_TABLE, out_dev, @@ -357,6 +392,13 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, __func__, err); return 0; +tx_err: + out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL; + if (out_mdev) + MPLS_INC_STATS(out_mdev, tx_errors); + goto drop; +err: + MPLS_INC_STATS(mdev, rx_errors); drop: kfree_skb(skb); return NET_RX_DROP; @@ -855,15 +897,279 @@ errout: return err; } +static void mpls_get_stats(struct mpls_dev *mdev, + struct mpls_link_stats *stats) +{ + struct mpls_pcpu_stats *p; + int i; + + memset(stats, 0, sizeof(*stats)); + + for_each_possible_cpu(i) { + struct mpls_link_stats local; + unsigned int start; + + p = per_cpu_ptr(mdev->stats, i); + do { + start = u64_stats_fetch_begin(&p->syncp); + local = p->stats; + } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += local.rx_packets; + stats->rx_bytes += local.rx_bytes; + stats->tx_packets += local.tx_packets; + stats->tx_bytes += local.tx_bytes; + stats->rx_errors += local.rx_errors; + stats->tx_errors += local.tx_errors; + stats->rx_dropped += local.rx_dropped; + stats->tx_dropped += local.tx_dropped; + stats->rx_noroute += local.rx_noroute; + } +} + +static int mpls_fill_stats_af(struct sk_buff *skb, + const struct net_device *dev) +{ + struct mpls_link_stats *stats; + struct mpls_dev *mdev; + struct nlattr *nla; + + mdev = mpls_dev_get(dev); + if (!mdev) + return -ENODATA; + + nla = nla_reserve_64bit(skb, MPLS_STATS_LINK, + sizeof(struct mpls_link_stats), + MPLS_STATS_UNSPEC); + if (!nla) + return -EMSGSIZE; + + stats = nla_data(nla); + mpls_get_stats(mdev, stats); + + return 0; +} + +static size_t mpls_get_stats_af_size(const struct net_device *dev) +{ + struct mpls_dev *mdev; + + mdev = mpls_dev_get(dev); + if (!mdev) + return 0; + + return nla_total_size_64bit(sizeof(struct mpls_link_stats)); +} + +static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev, + u32 portid, u32 seq, int event, + unsigned int flags, int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + bool all = false; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (!nlh) + return -EMSGSIZE; + + if (type == NETCONFA_ALL) + all = true; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_MPLS; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, mdev->dev->ifindex) < 0) + goto nla_put_failure; + + if ((all || type == NETCONFA_INPUT) && + nla_put_s32(skb, NETCONFA_INPUT, + mdev->input_enabled) < 0) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int mpls_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + bool all = false; + + if (type == NETCONFA_ALL) + all = true; + + if (all || type == NETCONFA_INPUT) + size += nla_total_size(4); + + return size; +} + +static void mpls_netconf_notify_devconf(struct net *net, int type, + struct mpls_dev *mdev) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(mpls_netconf_msgsize_devconf(type), GFP_KERNEL); + if (!skb) + goto errout; + + err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF, + 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err); +} + +static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = { + [NETCONFA_IFINDEX] = { .len = sizeof(int) }, +}; + +static int mpls_netconf_get_devconf(struct sk_buff *in_skb, + struct nlmsghdr *nlh) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NETCONFA_MAX + 1]; + struct netconfmsg *ncm; + struct net_device *dev; + struct mpls_dev *mdev; + struct sk_buff *skb; + int ifindex; + int err; + + err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, + devconf_mpls_policy); + if (err < 0) + goto errout; + + err = -EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + + ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); + dev = __dev_get_by_index(net, ifindex); + if (!dev) + goto errout; + + mdev = mpls_dev_get(dev); + if (!mdev) + goto errout; + + err = -ENOBUFS; + skb = nlmsg_new(mpls_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); + if (!skb) + goto errout; + + err = mpls_netconf_fill_devconf(skb, mdev, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWNETCONF, 0, + NETCONFA_ALL); + if (err < 0) { + /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); +errout: + return err; +} + +static int mpls_netconf_dump_devconf(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct hlist_head *head; + struct net_device *dev; + struct mpls_dev *mdev; + int idx, s_idx; + int h, s_h; + + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + rcu_read_lock(); + cb->seq = net->dev_base_seq; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + mdev = mpls_dev_get(dev); + if (!mdev) + goto cont; + if (mpls_netconf_fill_devconf(skb, mdev, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + NETCONFA_ALL) < 0) { + rcu_read_unlock(); + goto done; + } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + rcu_read_unlock(); + } +done: + cb->args[0] = h; + cb->args[1] = idx; + + return skb->len; +} + #define MPLS_PERDEV_SYSCTL_OFFSET(field) \ (&((struct mpls_dev *)0)->field) +static int mpls_conf_proc(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int oval = *(int *)ctl->data; + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + if (write) { + struct mpls_dev *mdev = ctl->extra1; + int i = (int *)ctl->data - (int *)mdev; + struct net *net = ctl->extra2; + int val = *(int *)ctl->data; + + if (i == offsetof(struct mpls_dev, input_enabled) && + val != oval) { + mpls_netconf_notify_devconf(net, + NETCONFA_INPUT, + mdev); + } + } + + return ret; +} + static const struct ctl_table mpls_dev_table[] = { { .procname = "input", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = mpls_conf_proc, .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), }, { } @@ -873,6 +1179,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, struct mpls_dev *mdev) { char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; + struct net *net = dev_net(dev); struct ctl_table *table; int i; @@ -883,8 +1190,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev, /* Table data contains only offsets relative to the base of * the mdev at this point, so make them absolute. */ - for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) + for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) { table[i].data = (char *)mdev + (uintptr_t)table[i].data; + table[i].extra1 = mdev; + table[i].extra2 = net; + } snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); @@ -913,6 +1223,7 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev) { struct mpls_dev *mdev; int err = -ENOMEM; + int i; ASSERT_RTNL(); @@ -920,19 +1231,40 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev) if (!mdev) return ERR_PTR(err); + mdev->stats = alloc_percpu(struct mpls_pcpu_stats); + if (!mdev->stats) + goto free; + + for_each_possible_cpu(i) { + struct mpls_pcpu_stats *mpls_stats; + + mpls_stats = per_cpu_ptr(mdev->stats, i); + u64_stats_init(&mpls_stats->syncp); + } + err = mpls_dev_sysctl_register(dev, mdev); if (err) goto free; + mdev->dev = dev; rcu_assign_pointer(dev->mpls_ptr, mdev); return mdev; free: + free_percpu(mdev->stats); kfree(mdev); return ERR_PTR(err); } +static void mpls_dev_destroy_rcu(struct rcu_head *head) +{ + struct mpls_dev *mdev = container_of(head, struct mpls_dev, rcu); + + free_percpu(mdev->stats); + kfree(mdev); +} + static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; @@ -1047,7 +1379,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (mdev) { mpls_dev_sysctl_unregister(mdev); RCU_INIT_POINTER(dev->mpls_ptr, NULL); - kfree_rcu(mdev, rcu); + call_rcu(&mdev->rcu, mpls_dev_destroy_rcu); } break; case NETDEV_CHANGENAME: @@ -1708,6 +2040,12 @@ static struct pernet_operations mpls_net_ops = { .exit = mpls_net_exit, }; +static struct rtnl_af_ops mpls_af_ops __read_mostly = { + .family = AF_MPLS, + .fill_stats_af = mpls_fill_stats_af, + .get_stats_af_size = mpls_get_stats_af_size, +}; + static int __init mpls_init(void) { int err; @@ -1724,9 +2062,13 @@ static int __init mpls_init(void) dev_add_pack(&mpls_packet_type); + rtnl_af_register(&mpls_af_ops); + rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL); + rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, + mpls_netconf_dump_devconf, NULL); err = 0; out: return err; @@ -1740,6 +2082,7 @@ module_init(mpls_init); static void __exit mpls_exit(void) { rtnl_unregister_all(PF_MPLS); + rtnl_af_unregister(&mpls_af_ops); dev_remove_pack(&mpls_packet_type); unregister_netdevice_notifier(&mpls_dev_notifier); unregister_pernet_subsys(&mpls_net_ops); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index bdfef6c3271a..76360d8b9579 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -9,13 +9,58 @@ struct mpls_entry_decoded { u8 bos; }; +struct mpls_pcpu_stats { + struct mpls_link_stats stats; + struct u64_stats_sync syncp; +}; + struct mpls_dev { - int input_enabled; + int input_enabled; + struct net_device *dev; + struct mpls_pcpu_stats __percpu *stats; - struct ctl_table_header *sysctl; - struct rcu_head rcu; + struct ctl_table_header *sysctl; + struct rcu_head rcu; }; +#if BITS_PER_LONG == 32 + +#define MPLS_INC_STATS_LEN(mdev, len, pkts_field, bytes_field) \ + do { \ + __typeof__(*(mdev)->stats) *ptr = \ + raw_cpu_ptr((mdev)->stats); \ + local_bh_disable(); \ + u64_stats_update_begin(&ptr->syncp); \ + ptr->stats.pkts_field++; \ + ptr->stats.bytes_field += (len); \ + u64_stats_update_end(&ptr->syncp); \ + local_bh_enable(); \ + } while (0) + +#define MPLS_INC_STATS(mdev, field) \ + do { \ + __typeof__(*(mdev)->stats) *ptr = \ + raw_cpu_ptr((mdev)->stats); \ + local_bh_disable(); \ + u64_stats_update_begin(&ptr->syncp); \ + ptr->stats.field++; \ + u64_stats_update_end(&ptr->syncp); \ + local_bh_enable(); \ + } while (0) + +#else + +#define MPLS_INC_STATS_LEN(mdev, len, pkts_field, bytes_field) \ + do { \ + this_cpu_inc((mdev)->stats->stats.pkts_field); \ + this_cpu_add((mdev)->stats->stats.bytes_field, (len)); \ + } while (0) + +#define MPLS_INC_STATS(mdev, field) \ + this_cpu_inc((mdev)->stats->stats.field) + +#endif + struct sk_buff; #define LABEL_NOT_SPECIFIED (1 << 20) @@ -114,6 +159,11 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr * return result; } +static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->mpls_ptr); +} + int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels, @@ -123,5 +173,7 @@ int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, bool mpls_output_possible(const struct net_device *dev); unsigned int mpls_dev_mtu(const struct net_device *dev); bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); +void mpls_stats_inc_outucastpkts(struct net_device *dev, + const struct sk_buff *skb); #endif /* MPLS_INTERNAL_H */ diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 1d281c1ff7c1..e4e4424f9eb1 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -48,11 +48,15 @@ static int mpls_xmit(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct rtable *rt = NULL; struct rt6_info *rt6 = NULL; + struct mpls_dev *out_mdev; int err = 0; bool bos; int i; unsigned int ttl; + /* Find the output device */ + out_dev = dst->dev; + /* Obtain the ttl */ if (dst->ops->family == AF_INET) { ttl = ip_hdr(skb)->ttl; @@ -66,8 +70,6 @@ static int mpls_xmit(struct sk_buff *skb) skb_orphan(skb); - /* Find the output device */ - out_dev = dst->dev; if (!mpls_output_possible(out_dev) || !dst->lwtstate || skb_warn_if_lro(skb)) goto drop; @@ -109,6 +111,8 @@ static int mpls_xmit(struct sk_buff *skb) bos = false; } + mpls_stats_inc_outucastpkts(out_dev, skb); + if (rt) err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway, skb); @@ -122,11 +126,14 @@ static int mpls_xmit(struct sk_buff *skb) return LWTUNNEL_XMIT_DONE; drop: + out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL; + if (out_mdev) + MPLS_INC_STATS(out_mdev, tx_errors); kfree_skb(skb); return -EINVAL; } -static int mpls_build_state(struct net_device *dev, struct nlattr *nla, +static int mpls_build_state(struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts) { |