diff options
Diffstat (limited to 'net')
128 files changed, 6470 insertions, 2261 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 86ae75b77390..c8f422c90856 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, diff --git a/net/atm/clip.c b/net/atm/clip.c index e07f551a863c..53b4ac09e7b7 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = { .connected_output = neigh_direct_output, }; -static int clip_constructor(struct neighbour *neigh) +static int clip_constructor(struct net_device *dev, struct neighbour *neigh) { struct atmarp_entry *entry = neighbour_priv(neigh); diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index f66930ee3c0b..833bb145ba3c 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -66,7 +66,7 @@ config BATMAN_ADV_NC config BATMAN_ADV_MCAST bool "Multicast optimisation" - depends on BATMAN_ADV + depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y) default n help This option enables the multicast optimisation which aims to diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 797cf2fc88c1..a83fc6c58d19 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -17,6 +17,7 @@ # obj-$(CONFIG_BATMAN_ADV) += batman-adv.o +batman-adv-y += bat_algo.o batman-adv-y += bat_iv_ogm.o batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v.o batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o @@ -31,12 +32,16 @@ batman-adv-y += gateway_common.o batman-adv-y += hard-interface.o batman-adv-y += hash.o batman-adv-y += icmp_socket.o +batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o +batman-adv-y += netlink.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o batman-adv-y += sysfs.o +batman-adv-y += tp_meter.o batman-adv-y += translation-table.o +batman-adv-y += tvlv.o diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c new file mode 100644 index 000000000000..81dbbf569bd4 --- /dev/null +++ b/net/batman-adv/bat_algo.c @@ -0,0 +1,140 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "main.h" + +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/moduleparam.h> +#include <linux/printk.h> +#include <linux/seq_file.h> +#include <linux/stddef.h> +#include <linux/string.h> + +#include "bat_algo.h" + +char batadv_routing_algo[20] = "BATMAN_IV"; +static struct hlist_head batadv_algo_list; + +/** + * batadv_algo_init - Initialize batman-adv algorithm management data structures + */ +void batadv_algo_init(void) +{ + INIT_HLIST_HEAD(&batadv_algo_list); +} + +static struct batadv_algo_ops *batadv_algo_get(char *name) +{ + struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; + + hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { + if (strcmp(bat_algo_ops_tmp->name, name) != 0) + continue; + + bat_algo_ops = bat_algo_ops_tmp; + break; + } + + return bat_algo_ops; +} + +int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) +{ + struct batadv_algo_ops *bat_algo_ops_tmp; + + bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name); + if (bat_algo_ops_tmp) { + pr_info("Trying to register already registered routing algorithm: %s\n", + bat_algo_ops->name); + return -EEXIST; + } + + /* all algorithms must implement all ops (for now) */ + if (!bat_algo_ops->iface.enable || + !bat_algo_ops->iface.disable || + !bat_algo_ops->iface.update_mac || + !bat_algo_ops->iface.primary_set || + !bat_algo_ops->neigh.cmp || + !bat_algo_ops->neigh.is_similar_or_better) { + pr_info("Routing algo '%s' does not implement required ops\n", + bat_algo_ops->name); + return -EINVAL; + } + + INIT_HLIST_NODE(&bat_algo_ops->list); + hlist_add_head(&bat_algo_ops->list, &batadv_algo_list); + + return 0; +} + +int batadv_algo_select(struct batadv_priv *bat_priv, char *name) +{ + struct batadv_algo_ops *bat_algo_ops; + + bat_algo_ops = batadv_algo_get(name); + if (!bat_algo_ops) + return -EINVAL; + + bat_priv->algo_ops = bat_algo_ops; + + return 0; +} + +int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) +{ + struct batadv_algo_ops *bat_algo_ops; + + seq_puts(seq, "Available routing algorithms:\n"); + + hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { + seq_printf(seq, " * %s\n", bat_algo_ops->name); + } + + return 0; +} + +static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) +{ + struct batadv_algo_ops *bat_algo_ops; + char *algo_name = (char *)val; + size_t name_len = strlen(algo_name); + + if (name_len > 0 && algo_name[name_len - 1] == '\n') + algo_name[name_len - 1] = '\0'; + + bat_algo_ops = batadv_algo_get(algo_name); + if (!bat_algo_ops) { + pr_err("Routing algorithm '%s' is not supported\n", algo_name); + return -EINVAL; + } + + return param_set_copystring(algo_name, kp); +} + +static const struct kernel_param_ops batadv_param_ops_ra = { + .set = batadv_param_set_ra, + .get = param_get_string, +}; + +static struct kparam_string batadv_param_string_ra = { + .maxlen = sizeof(batadv_routing_algo), + .string = batadv_routing_algo, +}; + +module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, + 0644); diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 03dafd33d23b..860d773dd8fa 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -18,32 +18,18 @@ #ifndef _NET_BATMAN_ADV_BAT_ALGO_H_ #define _NET_BATMAN_ADV_BAT_ALGO_H_ -struct batadv_priv; +#include "main.h" -int batadv_iv_init(void); +#include <linux/types.h> -#ifdef CONFIG_BATMAN_ADV_BATMAN_V +struct seq_file; -int batadv_v_init(void); -int batadv_v_mesh_init(struct batadv_priv *bat_priv); -void batadv_v_mesh_free(struct batadv_priv *bat_priv); +extern char batadv_routing_algo[]; +extern struct list_head batadv_hardif_list; -#else - -static inline int batadv_v_init(void) -{ - return 0; -} - -static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv) -{ - return 0; -} - -static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv) -{ -} - -#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ +void batadv_algo_init(void); +int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); +int batadv_algo_select(struct batadv_priv *bat_priv, char *name); +int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index ce2f203048d3..19b0abd6c640 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -15,7 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#include "bat_algo.h" +#include "bat_iv_ogm.h" #include "main.h" #include <linux/atomic.h> @@ -30,8 +30,9 @@ #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> -#include <linux/list.h> +#include <linux/kernel.h> #include <linux/kref.h> +#include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/pkt_sched.h> @@ -48,15 +49,20 @@ #include <linux/types.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "bitarray.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "network-coding.h" #include "originator.h" #include "packet.h" #include "routing.h" #include "send.h" #include "translation-table.h" +#include "tvlv.h" + +static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work); /** * enum batadv_dup_status - duplicate status @@ -336,7 +342,8 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, { struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(orig_node, hard_iface, neigh_addr); + neigh_node = batadv_neigh_node_get_or_create(orig_node, + hard_iface, neigh_addr); if (!neigh_node) goto out; @@ -730,7 +737,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, /* start timer for this packet */ INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, - batadv_send_outstanding_bat_ogm_packet); + batadv_iv_send_outstanding_bat_ogm_packet); queue_delayed_work(batadv_event_workqueue, &forw_packet_aggr->delayed_work, send_time - jiffies); @@ -937,6 +944,19 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; + if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || + (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + return; + + /* the interface gets activated here to avoid race conditions between + * the moment of activating the interface in + * hardif_activate_interface() where the originator mac is set and + * outdated packets (especially uninitialized mac addresses) in the + * packet queue + */ + if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) + hard_iface->if_status = BATADV_IF_ACTIVE; + primary_if = batadv_primary_if_get_selected(bat_priv); if (hard_iface == primary_if) { @@ -1778,6 +1798,45 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, batadv_orig_node_put(orig_node); } +static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_forw_packet *forw_packet; + struct batadv_priv *bat_priv; + + delayed_work = to_delayed_work(work); + forw_packet = container_of(delayed_work, struct batadv_forw_packet, + delayed_work); + bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); + spin_lock_bh(&bat_priv->forw_bat_list_lock); + hlist_del(&forw_packet->list); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) + goto out; + + batadv_iv_ogm_emit(forw_packet); + + /* we have to have at least one packet in the queue to determine the + * queues wake up time unless we are shutting down. + * + * only re-schedule if this is the "original" copy, e.g. the OGM of the + * primary interface should only be rescheduled once per period, but + * this function will be called for the forw_packet instances of the + * other secondary interfaces as well. + */ + if (forw_packet->own && + forw_packet->if_incoming == forw_packet->if_outgoing) + batadv_iv_ogm_schedule(forw_packet->if_incoming); + +out: + /* don't count own packet */ + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + + batadv_forw_packet_free(forw_packet); +} + static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { @@ -1794,7 +1853,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, /* did we receive a B.A.T.M.A.N. IV OGM packet on an interface * that does not have B.A.T.M.A.N. IV enabled ? */ - if (bat_priv->bat_algo_ops->bat_ogm_emit != batadv_iv_ogm_emit) + if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable) return NET_RX_DROP; batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); @@ -2052,21 +2111,32 @@ out: return ret; } +static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) +{ + /* begin scheduling originator messages on that interface */ + batadv_iv_ogm_schedule(hard_iface); +} + static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", - .bat_iface_enable = batadv_iv_ogm_iface_enable, - .bat_iface_disable = batadv_iv_ogm_iface_disable, - .bat_iface_update_mac = batadv_iv_ogm_iface_update_mac, - .bat_primary_iface_set = batadv_iv_ogm_primary_iface_set, - .bat_ogm_schedule = batadv_iv_ogm_schedule, - .bat_ogm_emit = batadv_iv_ogm_emit, - .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp, - .bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob, - .bat_neigh_print = batadv_iv_neigh_print, - .bat_orig_print = batadv_iv_ogm_orig_print, - .bat_orig_free = batadv_iv_ogm_orig_free, - .bat_orig_add_if = batadv_iv_ogm_orig_add_if, - .bat_orig_del_if = batadv_iv_ogm_orig_del_if, + .iface = { + .activate = batadv_iv_iface_activate, + .enable = batadv_iv_ogm_iface_enable, + .disable = batadv_iv_ogm_iface_disable, + .update_mac = batadv_iv_ogm_iface_update_mac, + .primary_set = batadv_iv_ogm_primary_iface_set, + }, + .neigh = { + .cmp = batadv_iv_ogm_neigh_cmp, + .is_similar_or_better = batadv_iv_ogm_neigh_is_sob, + .print = batadv_iv_neigh_print, + }, + .orig = { + .print = batadv_iv_ogm_orig_print, + .free = batadv_iv_ogm_orig_free, + .add_if = batadv_iv_ogm_orig_add_if, + .del_if = batadv_iv_ogm_orig_del_if, + }, }; int __init batadv_iv_init(void) diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h new file mode 100644 index 000000000000..b9f3550faaf7 --- /dev/null +++ b/net/batman-adv/bat_iv_ogm.h @@ -0,0 +1,25 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _BATMAN_ADV_BATADV_IV_OGM_H_ +#define _BATMAN_ADV_BATADV_IV_OGM_H_ + +#include "main.h" + +int batadv_iv_init(void); + +#endif /* _BATMAN_ADV_BATADV_IV_OGM_H_ */ diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 0a12e5cdd65d..0366cbf5e444 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -15,7 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#include "bat_algo.h" +#include "bat_v.h" #include "main.h" #include <linux/atomic.h> @@ -31,6 +31,7 @@ #include <linux/types.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "bat_v_elp.h" #include "bat_v_ogm.h" #include "hard-interface.h" @@ -70,11 +71,6 @@ static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) if (ret < 0) batadv_v_elp_iface_disable(hard_iface); - /* enable link throughput auto-detection by setting the throughput - * override to zero - */ - atomic_set(&hard_iface->bat_v.throughput_override, 0); - return ret; } @@ -119,14 +115,6 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) batadv_v_elp_throughput_metric_update); } -static void batadv_v_ogm_schedule(struct batadv_hard_iface *hard_iface) -{ -} - -static void batadv_v_ogm_emit(struct batadv_forw_packet *forw_packet) -{ -} - /** * batadv_v_orig_print_neigh - print neighbors for the originator table * @orig_node: the orig_node for which the neighbors are printed @@ -334,21 +322,39 @@ err_ifinfo1: static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", - .bat_iface_activate = batadv_v_iface_activate, - .bat_iface_enable = batadv_v_iface_enable, - .bat_iface_disable = batadv_v_iface_disable, - .bat_iface_update_mac = batadv_v_iface_update_mac, - .bat_primary_iface_set = batadv_v_primary_iface_set, - .bat_hardif_neigh_init = batadv_v_hardif_neigh_init, - .bat_ogm_emit = batadv_v_ogm_emit, - .bat_ogm_schedule = batadv_v_ogm_schedule, - .bat_orig_print = batadv_v_orig_print, - .bat_neigh_cmp = batadv_v_neigh_cmp, - .bat_neigh_is_similar_or_better = batadv_v_neigh_is_sob, - .bat_neigh_print = batadv_v_neigh_print, + .iface = { + .activate = batadv_v_iface_activate, + .enable = batadv_v_iface_enable, + .disable = batadv_v_iface_disable, + .update_mac = batadv_v_iface_update_mac, + .primary_set = batadv_v_primary_iface_set, + }, + .neigh = { + .hardif_init = batadv_v_hardif_neigh_init, + .cmp = batadv_v_neigh_cmp, + .is_similar_or_better = batadv_v_neigh_is_sob, + .print = batadv_v_neigh_print, + }, + .orig = { + .print = batadv_v_orig_print, + }, }; /** + * batadv_v_hardif_init - initialize the algorithm specific fields in the + * hard-interface object + * @hard_iface: the hard-interface to initialize + */ +void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) +{ + /* enable link throughput auto-detection by setting the throughput + * override to zero + */ + atomic_set(&hard_iface->bat_v.throughput_override, 0); + atomic_set(&hard_iface->bat_v.elp_interval, 500); +} + +/** * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a * mesh * @bat_priv: the object representing the mesh interface to initialise diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h new file mode 100644 index 000000000000..83b77639729e --- /dev/null +++ b/net/batman-adv/bat_v.h @@ -0,0 +1,52 @@ +/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Linus Lüssing + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_BAT_V_H_ +#define _NET_BATMAN_ADV_BAT_V_H_ + +#include "main.h" + +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + +int batadv_v_init(void); +void batadv_v_hardif_init(struct batadv_hard_iface *hardif); +int batadv_v_mesh_init(struct batadv_priv *bat_priv); +void batadv_v_mesh_free(struct batadv_priv *bat_priv); + +#else + +static inline int batadv_v_init(void) +{ + return 0; +} + +static inline void batadv_v_hardif_init(struct batadv_hard_iface *hardif) +{ +} + +static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv) +{ +} + +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + +#endif /* _NET_BATMAN_ADV_BAT_V_H_ */ diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index df42eb1365a0..7d170010beb9 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -43,6 +43,7 @@ #include "bat_algo.h" #include "bat_v_ogm.h" #include "hard-interface.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "routing.h" @@ -344,7 +345,6 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_v.elp_seqno, random_seqno); - atomic_set(&hard_iface->bat_v.elp_interval, 500); /* assume full-duplex by default */ hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX; @@ -443,7 +443,8 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv, if (!orig_neigh) return; - neigh = batadv_neigh_node_new(orig_neigh, if_incoming, neigh_addr); + neigh = batadv_neigh_node_get_or_create(orig_neigh, + if_incoming, neigh_addr); if (!neigh) goto orig_free; @@ -503,7 +504,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, /* did we receive a B.A.T.M.A.N. V ELP packet on an interface * that does not have B.A.T.M.A.N. V ELP enabled ? */ - if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0) + if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) return NET_RX_DROP; elp_packet = (struct batadv_elp_packet *)skb->data; diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index cc130b2d05e5..be17c0b1369e 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -15,11 +15,11 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#include "main.h" - #ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_ #define _NET_BATMAN_ADV_BAT_V_ELP_H_ +#include "main.h" + struct sk_buff; struct work_struct; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 473ebb9a0e73..6fbba4eb0617 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -39,13 +39,16 @@ #include <linux/types.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "routing.h" #include "send.h" #include "translation-table.h" +#include "tvlv.h" /** * batadv_v_ogm_orig_get - retrieve and possibly create an originator node @@ -683,8 +686,8 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, if (!orig_node) return; - neigh_node = batadv_neigh_node_new(orig_node, if_incoming, - ethhdr->h_source); + neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming, + ethhdr->h_source); if (!neigh_node) goto out; @@ -751,7 +754,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, /* did we receive a OGM2 packet on an interface that does not have * B.A.T.M.A.N. V enabled ? */ - if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0) + if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) return NET_RX_DROP; if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index d849c75ada0e..4c4d45caa422 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -18,10 +18,10 @@ #ifndef _BATMAN_ADV_BATADV_V_OGM_H_ #define _BATMAN_ADV_BATADV_V_OGM_H_ +#include "main.h" + #include <linux/types.h> -struct batadv_hard_iface; -struct batadv_priv; struct sk_buff; int batadv_v_ogm_init(struct batadv_priv *bat_priv); diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index a0c7913837a5..032271421a20 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -20,6 +20,8 @@ #include <linux/bitmap.h> +#include "log.h" + /* shift the packet array by n places. */ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n) { diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 748a9ead7ce5..e4f7494fb974 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -48,6 +48,7 @@ #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "sysfs.h" diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 952900466d88..1d68b6e63b96 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -18,245 +18,33 @@ #include "debugfs.h" #include "main.h" -#include <linux/compiler.h> #include <linux/debugfs.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/export.h> -#include <linux/fcntl.h> #include <linux/fs.h> -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/module.h> #include <linux/netdevice.h> -#include <linux/poll.h> #include <linux/printk.h> #include <linux/sched.h> /* for linux/wait.h */ #include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/spinlock.h> #include <linux/stat.h> #include <linux/stddef.h> #include <linux/stringify.h> #include <linux/sysfs.h> -#include <linux/types.h> -#include <linux/uaccess.h> -#include <linux/wait.h> -#include <stdarg.h> +#include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "icmp_socket.h" +#include "log.h" +#include "multicast.h" #include "network-coding.h" #include "originator.h" #include "translation-table.h" static struct dentry *batadv_debugfs; -#ifdef CONFIG_BATMAN_ADV_DEBUG -#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1) - -static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN; - -static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log, - size_t idx) -{ - return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK]; -} - -static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log, - char c) -{ - char *char_addr; - - char_addr = batadv_log_char_addr(debug_log, debug_log->log_end); - *char_addr = c; - debug_log->log_end++; - - if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len) - debug_log->log_start = debug_log->log_end - batadv_log_buff_len; -} - -__printf(2, 3) -static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log, - const char *fmt, ...) -{ - va_list args; - static char debug_log_buf[256]; - char *p; - - if (!debug_log) - return 0; - - spin_lock_bh(&debug_log->lock); - va_start(args, fmt); - vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args); - va_end(args); - - for (p = debug_log_buf; *p != 0; p++) - batadv_emit_log_char(debug_log, *p); - - spin_unlock_bh(&debug_log->lock); - - wake_up(&debug_log->queue_wait); - - return 0; -} - -int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) -{ - va_list args; - char tmp_log_buf[256]; - - va_start(args, fmt); - vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args); - batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s", - jiffies_to_msecs(jiffies), tmp_log_buf); - va_end(args); - - return 0; -} - -static int batadv_log_open(struct inode *inode, struct file *file) -{ - if (!try_module_get(THIS_MODULE)) - return -EBUSY; - - nonseekable_open(inode, file); - file->private_data = inode->i_private; - return 0; -} - -static int batadv_log_release(struct inode *inode, struct file *file) -{ - module_put(THIS_MODULE); - return 0; -} - -static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log) -{ - return !(debug_log->log_start - debug_log->log_end); -} - -static ssize_t batadv_log_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct batadv_priv *bat_priv = file->private_data; - struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; - int error, i = 0; - char *char_addr; - char c; - - if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log)) - return -EAGAIN; - - if (!buf) - return -EINVAL; - - if (count == 0) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - - error = wait_event_interruptible(debug_log->queue_wait, - (!batadv_log_empty(debug_log))); - - if (error) - return error; - - spin_lock_bh(&debug_log->lock); - - while ((!error) && (i < count) && - (debug_log->log_start != debug_log->log_end)) { - char_addr = batadv_log_char_addr(debug_log, - debug_log->log_start); - c = *char_addr; - - debug_log->log_start++; - - spin_unlock_bh(&debug_log->lock); - - error = __put_user(c, buf); - - spin_lock_bh(&debug_log->lock); - - buf++; - i++; - } - - spin_unlock_bh(&debug_log->lock); - - if (!error) - return i; - - return error; -} - -static unsigned int batadv_log_poll(struct file *file, poll_table *wait) -{ - struct batadv_priv *bat_priv = file->private_data; - struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; - - poll_wait(file, &debug_log->queue_wait, wait); - - if (!batadv_log_empty(debug_log)) - return POLLIN | POLLRDNORM; - - return 0; -} - -static const struct file_operations batadv_log_fops = { - .open = batadv_log_open, - .release = batadv_log_release, - .read = batadv_log_read, - .poll = batadv_log_poll, - .llseek = no_llseek, -}; - -static int batadv_debug_log_setup(struct batadv_priv *bat_priv) -{ - struct dentry *d; - - if (!bat_priv->debug_dir) - goto err; - - bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC); - if (!bat_priv->debug_log) - goto err; - - spin_lock_init(&bat_priv->debug_log->lock); - init_waitqueue_head(&bat_priv->debug_log->queue_wait); - - d = debugfs_create_file("log", S_IFREG | S_IRUSR, - bat_priv->debug_dir, bat_priv, - &batadv_log_fops); - if (!d) - goto err; - - return 0; - -err: - return -ENOMEM; -} - -static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) -{ - kfree(bat_priv->debug_log); - bat_priv->debug_log = NULL; -} -#else /* CONFIG_BATMAN_ADV_DEBUG */ -static int batadv_debug_log_setup(struct batadv_priv *bat_priv) -{ - return 0; -} - -static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) -{ -} -#endif - static int batadv_algorithms_open(struct inode *inode, struct file *file) { return single_open(file, batadv_algo_seq_print_text, NULL); @@ -363,6 +151,22 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file) } #endif +#ifdef CONFIG_BATMAN_ADV_MCAST +/** + * batadv_mcast_flags_open - prepare file handler for reads from mcast_flags + * @inode: inode which was opened + * @file: file handle to be initialized + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_mcast_flags_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + + return single_open(file, batadv_mcast_flags_seq_print_text, net_dev); +} +#endif + #define BATADV_DEBUGINFO(_name, _mode, _open) \ struct batadv_debuginfo batadv_debuginfo_##_name = { \ .attr = { \ @@ -407,6 +211,9 @@ static BATADV_DEBUGINFO(transtable_local, S_IRUGO, #ifdef CONFIG_BATMAN_ADV_NC static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); #endif +#ifdef CONFIG_BATMAN_ADV_MCAST +static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open); +#endif static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_neighbors, @@ -424,6 +231,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { #ifdef CONFIG_BATMAN_ADV_NC &batadv_debuginfo_nc_nodes, #endif +#ifdef CONFIG_BATMAN_ADV_MCAST + &batadv_debuginfo_mcast_flags, +#endif NULL, }; diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 278800a99c69..fa7646532a13 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -45,9 +45,11 @@ #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "send.h" #include "translation-table.h" +#include "tvlv.h" static void batadv_dat_purge(struct work_struct *work); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 65536db1bff7..0934730fb7ff 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -27,7 +27,6 @@ #include <linux/kernel.h> #include <linux/lockdep.h> #include <linux/netdevice.h> -#include <linux/pkt_sched.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -414,7 +413,7 @@ static struct sk_buff *batadv_frag_create(struct sk_buff *skb, if (!skb_fragment) goto err; - skb->priority = TC_PRIO_CONTROL; + skb_fragment->priority = skb->priority; /* Eat the last mtu-bytes of the skb */ skb_reserve(skb_fragment, header_size + ETH_HLEN); @@ -434,11 +433,12 @@ err: * @orig_node: final destination of the created fragments * @neigh_node: next-hop of the created fragments * - * Return: true on success, false otherwise. + * Return: the netdev tx status or -1 in case of error. + * When -1 is returned the skb is not consumed. */ -bool batadv_frag_send_packet(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node) +int batadv_frag_send_packet(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node) { struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; @@ -447,7 +447,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb, unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, max_packet_size; - bool ret = false; + int ret = -1; /* To avoid merge and refragmentation at next-hops we never send * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE @@ -458,12 +458,12 @@ bool batadv_frag_send_packet(struct sk_buff *skb, /* Don't even try to fragment, if we need more than 16 fragments */ if (skb->len > max_packet_size) - goto out_err; + goto out; bat_priv = orig_node->bat_priv; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) - goto out_err; + goto out; /* Create one header to be copied to all fragments */ frag_header.packet_type = BATADV_UNICAST_FRAG; @@ -473,6 +473,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb, frag_header.reserved = 0; frag_header.no = 0; frag_header.total_size = htons(skb->len); + + /* skb->priority values from 256->263 are magic values to + * directly indicate a specific 802.1d priority. This is used + * to allow 802.1d priority to be passed directly in from VLAN + * tags, etc. + */ + if (skb->priority >= 256 && skb->priority <= 263) + frag_header.priority = skb->priority - 256; + ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); ether_addr_copy(frag_header.dest, orig_node->orig); @@ -480,23 +489,33 @@ bool batadv_frag_send_packet(struct sk_buff *skb, while (skb->len > max_fragment_size) { skb_fragment = batadv_frag_create(skb, &frag_header, mtu); if (!skb_fragment) - goto out_err; + goto out; batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, skb_fragment->len + ETH_HLEN); - batadv_send_unicast_skb(skb_fragment, neigh_node); + ret = batadv_send_unicast_skb(skb_fragment, neigh_node); + if (ret != NET_XMIT_SUCCESS) { + /* return -1 so that the caller can free the original + * skb + */ + ret = -1; + goto out; + } + frag_header.no++; /* The initial check in this function should cover this case */ - if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) - goto out_err; + if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) { + ret = -1; + goto out; + } } /* Make room for the fragment header. */ if (batadv_skb_head_push(skb, header_size) < 0 || pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) - goto out_err; + goto out; memcpy(skb->data, &frag_header, header_size); @@ -504,11 +523,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb, batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, skb->len + ETH_HLEN); - batadv_send_unicast_skb(skb, neigh_node); - - ret = true; + ret = batadv_send_unicast_skb(skb, neigh_node); -out_err: +out: if (primary_if) batadv_hardif_put(primary_if); diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 9ff77c7ef7c7..3202fe329e63 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -34,9 +34,9 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_orig_node *orig_node_src); bool batadv_frag_skb_buffer(struct sk_buff **skb, struct batadv_orig_node *orig_node); -bool batadv_frag_send_packet(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node); +int batadv_frag_send_packet(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node); /** * batadv_frag_check_entry - check if a list of fragments has timed out diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 5839c569f769..63a805d3f96e 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -42,6 +42,7 @@ #include "gateway_common.h" #include "hard-interface.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "routing.h" @@ -192,7 +193,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) tq_avg = router_ifinfo->bat_iv.tq_avg; - switch (atomic_read(&bat_priv->gw_sel_class)) { + switch (atomic_read(&bat_priv->gw.sel_class)) { case 1: /* fast connection */ tmp_gw_factor = tq_avg * tq_avg; tmp_gw_factor *= gw_node->bandwidth_down; @@ -255,7 +256,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv) { struct batadv_gw_node *curr_gw; - if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) + if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT) return; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); @@ -283,7 +284,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv) struct batadv_neigh_ifinfo *router_ifinfo = NULL; char gw_addr[18] = { '\0' }; - if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) + if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT) goto out; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); @@ -402,8 +403,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv, /* if the routing class is greater than 3 the value tells us how much * greater the TQ value of the new gateway must be */ - if ((atomic_read(&bat_priv->gw_sel_class) > 3) && - (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class))) + if ((atomic_read(&bat_priv->gw.sel_class) > 3) && + (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) goto out; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -638,8 +639,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) goto out; seq_printf(seq, - " %-12s (%s/%i) %17s [%10s]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", - "Gateway", "#", BATADV_TQ_MAX_VALUE, "Nexthop", "outgoingIF", + " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name); @@ -821,7 +821,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!gw_node) goto out; - switch (atomic_read(&bat_priv->gw_mode)) { + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_SERVER: /* If we are a GW then we are our best GW. We can artificially * set the tq towards ourself as the maximum value diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 4423047889e1..d7bc6a87bcc9 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -19,8 +19,8 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/errno.h> #include <linux/byteorder/generic.h> +#include <linux/errno.h> #include <linux/kernel.h> #include <linux/math64.h> #include <linux/netdevice.h> @@ -28,7 +28,9 @@ #include <linux/string.h> #include "gateway_client.h" +#include "log.h" #include "packet.h" +#include "tvlv.h" /** * batadv_parse_throughput - parse supplied string buffer to extract throughput @@ -144,7 +146,7 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) u32 down, up; char gw_mode; - gw_mode = atomic_read(&bat_priv->gw_mode); + gw_mode = atomic_read(&bat_priv->gw.mode); switch (gw_mode) { case BATADV_GW_MODE_OFF: @@ -241,8 +243,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /* restart gateway selection if fast or late switching was enabled */ if ((gateway.bandwidth_down != 0) && - (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) && - (atomic_read(&bat_priv->gw_sel_class) > 2)) + (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) && + (atomic_read(&bat_priv->gw.sel_class) > 2)) batadv_gw_check_election(bat_priv, orig); } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 8c2f39962fa5..1f9080840566 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -23,9 +23,9 @@ #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/if.h> #include <linux/if_arp.h> #include <linux/if_ether.h> -#include <linux/if.h> #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> @@ -37,10 +37,12 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> +#include "bat_v.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "send.h" @@ -245,7 +247,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv, if (!new_hard_iface) goto out; - bat_priv->bat_algo_ops->bat_primary_iface_set(new_hard_iface); + bat_priv->algo_ops->iface.primary_set(new_hard_iface); batadv_primary_if_update_addr(bat_priv, curr_hard_iface); out: @@ -392,7 +394,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) bat_priv = netdev_priv(hard_iface->soft_iface); - bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface); + bat_priv->algo_ops->iface.update_mac(hard_iface); hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED; /* the first active interface becomes our primary interface or @@ -407,8 +409,8 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) batadv_update_min_mtu(hard_iface->soft_iface); - if (bat_priv->bat_algo_ops->bat_iface_activate) - bat_priv->bat_algo_ops->bat_iface_activate(hard_iface); + if (bat_priv->algo_ops->iface.activate) + bat_priv->algo_ops->iface.activate(hard_iface); out: if (primary_if) @@ -506,7 +508,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, if (ret) goto err_dev; - ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface); + ret = bat_priv->algo_ops->iface.enable(hard_iface); if (ret < 0) goto err_upper; @@ -515,7 +517,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->if_status = BATADV_IF_INACTIVE; ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces); if (ret < 0) { - bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); + bat_priv->algo_ops->iface.disable(hard_iface); bat_priv->num_ifaces--; hard_iface->if_status = BATADV_IF_NOT_IN_USE; goto err_upper; @@ -553,9 +555,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(soft_iface); - /* begin scheduling originator messages on that interface */ - batadv_schedule_bat_ogm(hard_iface); - out: return 0; @@ -599,7 +598,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_put(new_if); } - bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); + bat_priv->algo_ops->iface.disable(hard_iface); hard_iface->if_status = BATADV_IF_NOT_IN_USE; /* delete all references to this hard_iface */ @@ -686,6 +685,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (batadv_is_wifi_netdev(net_dev)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; + batadv_v_hardif_init(hard_iface); + /* extra reference for return */ kref_init(&hard_iface->refcount); kref_get(&hard_iface->refcount); @@ -782,7 +783,7 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_check_known_mac_addr(hard_iface->net_dev); bat_priv = netdev_priv(hard_iface->soft_iface); - bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface); + bat_priv->algo_ops->iface.update_mac(hard_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 777aea10cd8f..378cc1119d66 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -45,6 +45,7 @@ #include <linux/wait.h> #include "hard-interface.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "send.h" diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c new file mode 100644 index 000000000000..56dc532f7a2c --- /dev/null +++ b/net/batman-adv/log.c @@ -0,0 +1,231 @@ +/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "log.h" +#include "main.h" + +#include <linux/compiler.h> +#include <linux/debugfs.h> +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/fcntl.h> +#include <linux/fs.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/sched.h> /* for linux/wait.h */ +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stat.h> +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/wait.h> +#include <stdarg.h> + +#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1) + +static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN; + +static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log, + size_t idx) +{ + return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK]; +} + +static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log, + char c) +{ + char *char_addr; + + char_addr = batadv_log_char_addr(debug_log, debug_log->log_end); + *char_addr = c; + debug_log->log_end++; + + if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len) + debug_log->log_start = debug_log->log_end - batadv_log_buff_len; +} + +__printf(2, 3) +static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log, + const char *fmt, ...) +{ + va_list args; + static char debug_log_buf[256]; + char *p; + + if (!debug_log) + return 0; + + spin_lock_bh(&debug_log->lock); + va_start(args, fmt); + vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args); + va_end(args); + + for (p = debug_log_buf; *p != 0; p++) + batadv_emit_log_char(debug_log, *p); + + spin_unlock_bh(&debug_log->lock); + + wake_up(&debug_log->queue_wait); + + return 0; +} + +int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) +{ + va_list args; + char tmp_log_buf[256]; + + va_start(args, fmt); + vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args); + batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s", + jiffies_to_msecs(jiffies), tmp_log_buf); + va_end(args); + + return 0; +} + +static int batadv_log_open(struct inode *inode, struct file *file) +{ + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + + nonseekable_open(inode, file); + file->private_data = inode->i_private; + return 0; +} + +static int batadv_log_release(struct inode *inode, struct file *file) +{ + module_put(THIS_MODULE); + return 0; +} + +static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log) +{ + return !(debug_log->log_start - debug_log->log_end); +} + +static ssize_t batadv_log_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct batadv_priv *bat_priv = file->private_data; + struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; + int error, i = 0; + char *char_addr; + char c; + + if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log)) + return -EAGAIN; + + if (!buf) + return -EINVAL; + + if (count == 0) + return 0; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + error = wait_event_interruptible(debug_log->queue_wait, + (!batadv_log_empty(debug_log))); + + if (error) + return error; + + spin_lock_bh(&debug_log->lock); + + while ((!error) && (i < count) && + (debug_log->log_start != debug_log->log_end)) { + char_addr = batadv_log_char_addr(debug_log, + debug_log->log_start); + c = *char_addr; + + debug_log->log_start++; + + spin_unlock_bh(&debug_log->lock); + + error = __put_user(c, buf); + + spin_lock_bh(&debug_log->lock); + + buf++; + i++; + } + + spin_unlock_bh(&debug_log->lock); + + if (!error) + return i; + + return error; +} + +static unsigned int batadv_log_poll(struct file *file, poll_table *wait) +{ + struct batadv_priv *bat_priv = file->private_data; + struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; + + poll_wait(file, &debug_log->queue_wait, wait); + + if (!batadv_log_empty(debug_log)) + return POLLIN | POLLRDNORM; + + return 0; +} + +static const struct file_operations batadv_log_fops = { + .open = batadv_log_open, + .release = batadv_log_release, + .read = batadv_log_read, + .poll = batadv_log_poll, + .llseek = no_llseek, +}; + +int batadv_debug_log_setup(struct batadv_priv *bat_priv) +{ + struct dentry *d; + + if (!bat_priv->debug_dir) + goto err; + + bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC); + if (!bat_priv->debug_log) + goto err; + + spin_lock_init(&bat_priv->debug_log->lock); + init_waitqueue_head(&bat_priv->debug_log->queue_wait); + + d = debugfs_create_file("log", S_IFREG | S_IRUSR, + bat_priv->debug_dir, bat_priv, + &batadv_log_fops); + if (!d) + goto err; + + return 0; + +err: + return -ENOMEM; +} + +void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) +{ + kfree(bat_priv->debug_log); + bat_priv->debug_log = NULL; +} diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h new file mode 100644 index 000000000000..e0e1a88c3e58 --- /dev/null +++ b/net/batman-adv/log.h @@ -0,0 +1,111 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_LOG_H_ +#define _NET_BATMAN_ADV_LOG_H_ + +#include "main.h" + +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/printk.h> + +#ifdef CONFIG_BATMAN_ADV_DEBUG + +int batadv_debug_log_setup(struct batadv_priv *bat_priv); +void batadv_debug_log_cleanup(struct batadv_priv *bat_priv); + +#else + +static inline int batadv_debug_log_setup(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) +{ +} + +#endif + +/** + * enum batadv_dbg_level - available log levels + * @BATADV_DBG_BATMAN: OGM and TQ computations related messages + * @BATADV_DBG_ROUTES: route added / changed / deleted + * @BATADV_DBG_TT: translation table messages + * @BATADV_DBG_BLA: bridge loop avoidance messages + * @BATADV_DBG_DAT: ARP snooping and DAT related messages + * @BATADV_DBG_NC: network coding related messages + * @BATADV_DBG_MCAST: multicast related messages + * @BATADV_DBG_TP_METER: throughput meter messages + * @BATADV_DBG_ALL: the union of all the above log levels + */ +enum batadv_dbg_level { + BATADV_DBG_BATMAN = BIT(0), + BATADV_DBG_ROUTES = BIT(1), + BATADV_DBG_TT = BIT(2), + BATADV_DBG_BLA = BIT(3), + BATADV_DBG_DAT = BIT(4), + BATADV_DBG_NC = BIT(5), + BATADV_DBG_MCAST = BIT(6), + BATADV_DBG_TP_METER = BIT(7), + BATADV_DBG_ALL = 127, +}; + +#ifdef CONFIG_BATMAN_ADV_DEBUG +int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) +__printf(2, 3); + +/* possibly ratelimited debug output */ +#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ + do { \ + if (atomic_read(&bat_priv->log_level) & type && \ + (!ratelimited || net_ratelimit())) \ + batadv_debug_log(bat_priv, fmt, ## arg);\ + } \ + while (0) +#else /* !CONFIG_BATMAN_ADV_DEBUG */ +__printf(4, 5) +static inline void _batadv_dbg(int type __always_unused, + struct batadv_priv *bat_priv __always_unused, + int ratelimited __always_unused, + const char *fmt __always_unused, ...) +{ +} +#endif + +#define batadv_dbg(type, bat_priv, arg...) \ + _batadv_dbg(type, bat_priv, 0, ## arg) +#define batadv_dbg_ratelimited(type, bat_priv, arg...) \ + _batadv_dbg(type, bat_priv, 1, ## arg) + +#define batadv_info(net_dev, fmt, arg...) \ + do { \ + struct net_device *_netdev = (net_dev); \ + struct batadv_priv *_batpriv = netdev_priv(_netdev); \ + batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ + pr_info("%s: " fmt, _netdev->name, ## arg); \ + } while (0) +#define batadv_err(net_dev, fmt, arg...) \ + do { \ + struct net_device *_netdev = (net_dev); \ + struct batadv_priv *_batpriv = netdev_priv(_netdev); \ + batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ + pr_err("%s: " fmt, _netdev->name, ## arg); \ + } while (0) + +#endif /* _NET_BATMAN_ADV_LOG_H_ */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5f2974bd1227..fe4c5e29f96b 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -31,16 +31,13 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> -#include <linux/lockdep.h> #include <linux/module.h> -#include <linux/moduleparam.h> #include <linux/netdevice.h> -#include <linux/pkt_sched.h> +#include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> #include <linux/skbuff.h> -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> @@ -49,6 +46,8 @@ #include <net/rtnetlink.h> #include "bat_algo.h" +#include "bat_iv_ogm.h" +#include "bat_v.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" @@ -56,13 +55,16 @@ #include "gateway_common.h" #include "hard-interface.h" #include "icmp_socket.h" +#include "log.h" #include "multicast.h" +#include "netlink.h" #include "network-coding.h" #include "originator.h" #include "packet.h" #include "routing.h" #include "send.h" #include "soft-interface.h" +#include "tp_meter.h" #include "translation-table.h" /* List manipulations on hardif_list have to be rtnl_lock()'ed, @@ -71,8 +73,6 @@ struct list_head batadv_hardif_list; static int (*batadv_rx_handler[256])(struct sk_buff *, struct batadv_hard_iface *); -char batadv_routing_algo[20] = "BATMAN_IV"; -static struct hlist_head batadv_algo_list; unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -83,13 +83,14 @@ static void batadv_recv_handler_init(void); static int __init batadv_init(void) { INIT_LIST_HEAD(&batadv_hardif_list); - INIT_HLIST_HEAD(&batadv_algo_list); + batadv_algo_init(); batadv_recv_handler_init(); batadv_v_init(); batadv_iv_init(); batadv_nc_init(); + batadv_tp_meter_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); @@ -101,6 +102,7 @@ static int __init batadv_init(void) register_netdevice_notifier(&batadv_hard_if_notifier); rtnl_link_register(&batadv_link_ops); + batadv_netlink_register(); pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n", BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); @@ -111,6 +113,7 @@ static int __init batadv_init(void) static void __exit batadv_exit(void) { batadv_debugfs_destroy(); + batadv_netlink_unregister(); rtnl_link_unregister(&batadv_link_ops); unregister_netdevice_notifier(&batadv_hard_if_notifier); batadv_hardif_remove_interfaces(); @@ -141,6 +144,7 @@ int batadv_mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->tvlv.container_list_lock); spin_lock_init(&bat_priv->tvlv.handler_list_lock); spin_lock_init(&bat_priv->softif_vlan_list_lock); + spin_lock_init(&bat_priv->tp_list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); @@ -159,6 +163,7 @@ int batadv_mesh_init(struct net_device *soft_iface) INIT_HLIST_HEAD(&bat_priv->tvlv.container_list); INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); + INIT_HLIST_HEAD(&bat_priv->tp_list); ret = batadv_v_mesh_init(bat_priv); if (ret < 0) @@ -538,78 +543,6 @@ void batadv_recv_handler_unregister(u8 packet_type) batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet; } -static struct batadv_algo_ops *batadv_algo_get(char *name) -{ - struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; - - hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { - if (strcmp(bat_algo_ops_tmp->name, name) != 0) - continue; - - bat_algo_ops = bat_algo_ops_tmp; - break; - } - - return bat_algo_ops; -} - -int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) -{ - struct batadv_algo_ops *bat_algo_ops_tmp; - - bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name); - if (bat_algo_ops_tmp) { - pr_info("Trying to register already registered routing algorithm: %s\n", - bat_algo_ops->name); - return -EEXIST; - } - - /* all algorithms must implement all ops (for now) */ - if (!bat_algo_ops->bat_iface_enable || - !bat_algo_ops->bat_iface_disable || - !bat_algo_ops->bat_iface_update_mac || - !bat_algo_ops->bat_primary_iface_set || - !bat_algo_ops->bat_ogm_schedule || - !bat_algo_ops->bat_ogm_emit || - !bat_algo_ops->bat_neigh_cmp || - !bat_algo_ops->bat_neigh_is_similar_or_better) { - pr_info("Routing algo '%s' does not implement required ops\n", - bat_algo_ops->name); - return -EINVAL; - } - - INIT_HLIST_NODE(&bat_algo_ops->list); - hlist_add_head(&bat_algo_ops->list, &batadv_algo_list); - - return 0; -} - -int batadv_algo_select(struct batadv_priv *bat_priv, char *name) -{ - struct batadv_algo_ops *bat_algo_ops; - - bat_algo_ops = batadv_algo_get(name); - if (!bat_algo_ops) - return -EINVAL; - - bat_priv->bat_algo_ops = bat_algo_ops; - - return 0; -} - -int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) -{ - struct batadv_algo_ops *bat_algo_ops; - - seq_puts(seq, "Available routing algorithms:\n"); - - hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { - seq_printf(seq, " * %s\n", bat_algo_ops->name); - } - - return 0; -} - /** * batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in * the header @@ -644,594 +577,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) } /** - * batadv_tvlv_handler_release - release tvlv handler from lists and queue for - * free after rcu grace period - * @ref: kref pointer of the tvlv - */ -static void batadv_tvlv_handler_release(struct kref *ref) -{ - struct batadv_tvlv_handler *tvlv_handler; - - tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount); - kfree_rcu(tvlv_handler, rcu); -} - -/** - * batadv_tvlv_handler_put - decrement the tvlv container refcounter and - * possibly release it - * @tvlv_handler: the tvlv handler to free - */ -static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) -{ - kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release); -} - -/** - * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list - * based on the provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv handler type to look for - * @version: tvlv handler version to look for - * - * Return: tvlv handler if found or NULL otherwise. - */ -static struct batadv_tvlv_handler * -batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) -{ - struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tvlv_handler_tmp, - &bat_priv->tvlv.handler_list, list) { - if (tvlv_handler_tmp->type != type) - continue; - - if (tvlv_handler_tmp->version != version) - continue; - - if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount)) - continue; - - tvlv_handler = tvlv_handler_tmp; - break; - } - rcu_read_unlock(); - - return tvlv_handler; -} - -/** - * batadv_tvlv_container_release - release tvlv from lists and free - * @ref: kref pointer of the tvlv - */ -static void batadv_tvlv_container_release(struct kref *ref) -{ - struct batadv_tvlv_container *tvlv; - - tvlv = container_of(ref, struct batadv_tvlv_container, refcount); - kfree(tvlv); -} - -/** - * batadv_tvlv_container_put - decrement the tvlv container refcounter and - * possibly release it - * @tvlv: the tvlv container to free - */ -static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) -{ - kref_put(&tvlv->refcount, batadv_tvlv_container_release); -} - -/** - * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container - * list based on the provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv container type to look for - * @version: tvlv container version to look for - * - * Has to be called with the appropriate locks being acquired - * (tvlv.container_list_lock). - * - * Return: tvlv container if found or NULL otherwise. - */ -static struct batadv_tvlv_container * -batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) -{ - struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; - - lockdep_assert_held(&bat_priv->tvlv.container_list_lock); - - hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) { - if (tvlv_tmp->tvlv_hdr.type != type) - continue; - - if (tvlv_tmp->tvlv_hdr.version != version) - continue; - - kref_get(&tvlv_tmp->refcount); - tvlv = tvlv_tmp; - break; - } - - return tvlv; -} - -/** - * batadv_tvlv_container_list_size - calculate the size of the tvlv container - * list entries - * @bat_priv: the bat priv with all the soft interface information - * - * Has to be called with the appropriate locks being acquired - * (tvlv.container_list_lock). - * - * Return: size of all currently registered tvlv containers in bytes. - */ -static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) -{ - struct batadv_tvlv_container *tvlv; - u16 tvlv_len = 0; - - lockdep_assert_held(&bat_priv->tvlv.container_list_lock); - - hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { - tvlv_len += sizeof(struct batadv_tvlv_hdr); - tvlv_len += ntohs(tvlv->tvlv_hdr.len); - } - - return tvlv_len; -} - -/** - * batadv_tvlv_container_remove - remove tvlv container from the tvlv container - * list - * @bat_priv: the bat priv with all the soft interface information - * @tvlv: the to be removed tvlv container - * - * Has to be called with the appropriate locks being acquired - * (tvlv.container_list_lock). - */ -static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, - struct batadv_tvlv_container *tvlv) -{ - lockdep_assert_held(&bat_priv->tvlv.container_list_lock); - - if (!tvlv) - return; - - hlist_del(&tvlv->list); - - /* first call to decrement the counter, second call to free */ - batadv_tvlv_container_put(tvlv); - batadv_tvlv_container_put(tvlv); -} - -/** - * batadv_tvlv_container_unregister - unregister tvlv container based on the - * provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv container type to unregister - * @version: tvlv container type to unregister - */ -void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, - u8 type, u8 version) -{ - struct batadv_tvlv_container *tvlv; - - spin_lock_bh(&bat_priv->tvlv.container_list_lock); - tvlv = batadv_tvlv_container_get(bat_priv, type, version); - batadv_tvlv_container_remove(bat_priv, tvlv); - spin_unlock_bh(&bat_priv->tvlv.container_list_lock); -} - -/** - * batadv_tvlv_container_register - register tvlv type, version and content - * to be propagated with each (primary interface) OGM - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv container type - * @version: tvlv container version - * @tvlv_value: tvlv container content - * @tvlv_value_len: tvlv container content length - * - * If a container of the same type and version was already registered the new - * content is going to replace the old one. - */ -void batadv_tvlv_container_register(struct batadv_priv *bat_priv, - u8 type, u8 version, - void *tvlv_value, u16 tvlv_value_len) -{ - struct batadv_tvlv_container *tvlv_old, *tvlv_new; - - if (!tvlv_value) - tvlv_value_len = 0; - - tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC); - if (!tvlv_new) - return; - - tvlv_new->tvlv_hdr.version = version; - tvlv_new->tvlv_hdr.type = type; - tvlv_new->tvlv_hdr.len = htons(tvlv_value_len); - - memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len)); - INIT_HLIST_NODE(&tvlv_new->list); - kref_init(&tvlv_new->refcount); - - spin_lock_bh(&bat_priv->tvlv.container_list_lock); - tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); - batadv_tvlv_container_remove(bat_priv, tvlv_old); - hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list); - spin_unlock_bh(&bat_priv->tvlv.container_list_lock); -} - -/** - * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate - * requested packet size - * @packet_buff: packet buffer - * @packet_buff_len: packet buffer size - * @min_packet_len: requested packet minimum size - * @additional_packet_len: requested additional packet size on top of minimum - * size - * - * Return: true of the packet buffer could be changed to the requested size, - * false otherwise. - */ -static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, - int *packet_buff_len, - int min_packet_len, - int additional_packet_len) -{ - unsigned char *new_buff; - - new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC); - - /* keep old buffer if kmalloc should fail */ - if (!new_buff) - return false; - - memcpy(new_buff, *packet_buff, min_packet_len); - kfree(*packet_buff); - *packet_buff = new_buff; - *packet_buff_len = min_packet_len + additional_packet_len; - - return true; -} - -/** - * batadv_tvlv_container_ogm_append - append tvlv container content to given - * OGM packet buffer - * @bat_priv: the bat priv with all the soft interface information - * @packet_buff: ogm packet buffer - * @packet_buff_len: ogm packet buffer size including ogm header and tvlv - * content - * @packet_min_len: ogm header size to be preserved for the OGM itself - * - * The ogm packet might be enlarged or shrunk depending on the current size - * and the size of the to-be-appended tvlv containers. - * - * Return: size of all appended tvlv containers in bytes. - */ -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, int packet_min_len) -{ - struct batadv_tvlv_container *tvlv; - struct batadv_tvlv_hdr *tvlv_hdr; - u16 tvlv_value_len; - void *tvlv_value; - bool ret; - - spin_lock_bh(&bat_priv->tvlv.container_list_lock); - tvlv_value_len = batadv_tvlv_container_list_size(bat_priv); - - ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, - packet_min_len, tvlv_value_len); - - if (!ret) - goto end; - - if (!tvlv_value_len) - goto end; - - tvlv_value = (*packet_buff) + packet_min_len; - - hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { - tvlv_hdr = tvlv_value; - tvlv_hdr->type = tvlv->tvlv_hdr.type; - tvlv_hdr->version = tvlv->tvlv_hdr.version; - tvlv_hdr->len = tvlv->tvlv_hdr.len; - tvlv_value = tvlv_hdr + 1; - memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len)); - tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len); - } - -end: - spin_unlock_bh(&bat_priv->tvlv.container_list_lock); - return tvlv_value_len; -} - -/** - * batadv_tvlv_call_handler - parse the given tvlv buffer to call the - * appropriate handlers - * @bat_priv: the bat priv with all the soft interface information - * @tvlv_handler: tvlv callback function handling the tvlv content - * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet - * @orig_node: orig node emitting the ogm packet - * @src: source mac address of the unicast packet - * @dst: destination mac address of the unicast packet - * @tvlv_value: tvlv content - * @tvlv_value_len: tvlv content length - * - * Return: success if handler was not found or the return value of the handler - * callback. - */ -static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, - struct batadv_tvlv_handler *tvlv_handler, - bool ogm_source, - struct batadv_orig_node *orig_node, - u8 *src, u8 *dst, - void *tvlv_value, u16 tvlv_value_len) -{ - if (!tvlv_handler) - return NET_RX_SUCCESS; - - if (ogm_source) { - if (!tvlv_handler->ogm_handler) - return NET_RX_SUCCESS; - - if (!orig_node) - return NET_RX_SUCCESS; - - tvlv_handler->ogm_handler(bat_priv, orig_node, - BATADV_NO_FLAGS, - tvlv_value, tvlv_value_len); - tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED; - } else { - if (!src) - return NET_RX_SUCCESS; - - if (!dst) - return NET_RX_SUCCESS; - - if (!tvlv_handler->unicast_handler) - return NET_RX_SUCCESS; - - return tvlv_handler->unicast_handler(bat_priv, src, - dst, tvlv_value, - tvlv_value_len); - } - - return NET_RX_SUCCESS; -} - -/** - * batadv_tvlv_containers_process - parse the given tvlv buffer to call the - * appropriate handlers - * @bat_priv: the bat priv with all the soft interface information - * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet - * @orig_node: orig node emitting the ogm packet - * @src: source mac address of the unicast packet - * @dst: destination mac address of the unicast packet - * @tvlv_value: tvlv content - * @tvlv_value_len: tvlv content length - * - * Return: success when processing an OGM or the return value of all called - * handler callbacks. - */ -int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, - bool ogm_source, - struct batadv_orig_node *orig_node, - u8 *src, u8 *dst, - void *tvlv_value, u16 tvlv_value_len) -{ - struct batadv_tvlv_handler *tvlv_handler; - struct batadv_tvlv_hdr *tvlv_hdr; - u16 tvlv_value_cont_len; - u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND; - int ret = NET_RX_SUCCESS; - - while (tvlv_value_len >= sizeof(*tvlv_hdr)) { - tvlv_hdr = tvlv_value; - tvlv_value_cont_len = ntohs(tvlv_hdr->len); - tvlv_value = tvlv_hdr + 1; - tvlv_value_len -= sizeof(*tvlv_hdr); - - if (tvlv_value_cont_len > tvlv_value_len) - break; - - tvlv_handler = batadv_tvlv_handler_get(bat_priv, - tvlv_hdr->type, - tvlv_hdr->version); - - ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler, - ogm_source, orig_node, - src, dst, tvlv_value, - tvlv_value_cont_len); - if (tvlv_handler) - batadv_tvlv_handler_put(tvlv_handler); - tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; - tvlv_value_len -= tvlv_value_cont_len; - } - - if (!ogm_source) - return ret; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tvlv_handler, - &bat_priv->tvlv.handler_list, list) { - if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) && - !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED)) - tvlv_handler->ogm_handler(bat_priv, orig_node, - cifnotfound, NULL, 0); - - tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED; - } - rcu_read_unlock(); - - return NET_RX_SUCCESS; -} - -/** - * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate - * handlers - * @bat_priv: the bat priv with all the soft interface information - * @batadv_ogm_packet: ogm packet containing the tvlv containers - * @orig_node: orig node emitting the ogm packet - */ -void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, - struct batadv_ogm_packet *batadv_ogm_packet, - struct batadv_orig_node *orig_node) -{ - void *tvlv_value; - u16 tvlv_value_len; - - if (!batadv_ogm_packet) - return; - - tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len); - if (!tvlv_value_len) - return; - - tvlv_value = batadv_ogm_packet + 1; - - batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL, - tvlv_value, tvlv_value_len); -} - -/** - * batadv_tvlv_handler_register - register tvlv handler based on the provided - * type and version (both need to match) for ogm tvlv payload and/or unicast - * payload - * @bat_priv: the bat priv with all the soft interface information - * @optr: ogm tvlv handler callback function. This function receives the orig - * node, flags and the tvlv content as argument to process. - * @uptr: unicast tvlv handler callback function. This function receives the - * source & destination of the unicast packet as well as the tvlv content - * to process. - * @type: tvlv handler type to be registered - * @version: tvlv handler version to be registered - * @flags: flags to enable or disable TVLV API behavior - */ -void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, - void (*optr)(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig, - u8 flags, - void *tvlv_value, - u16 tvlv_value_len), - int (*uptr)(struct batadv_priv *bat_priv, - u8 *src, u8 *dst, - void *tvlv_value, - u16 tvlv_value_len), - u8 type, u8 version, u8 flags) -{ - struct batadv_tvlv_handler *tvlv_handler; - - tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); - if (tvlv_handler) { - batadv_tvlv_handler_put(tvlv_handler); - return; - } - - tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC); - if (!tvlv_handler) - return; - - tvlv_handler->ogm_handler = optr; - tvlv_handler->unicast_handler = uptr; - tvlv_handler->type = type; - tvlv_handler->version = version; - tvlv_handler->flags = flags; - kref_init(&tvlv_handler->refcount); - INIT_HLIST_NODE(&tvlv_handler->list); - - spin_lock_bh(&bat_priv->tvlv.handler_list_lock); - hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); - spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); -} - -/** - * batadv_tvlv_handler_unregister - unregister tvlv handler based on the - * provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv handler type to be unregistered - * @version: tvlv handler version to be unregistered - */ -void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, - u8 type, u8 version) -{ - struct batadv_tvlv_handler *tvlv_handler; - - tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); - if (!tvlv_handler) - return; - - batadv_tvlv_handler_put(tvlv_handler); - spin_lock_bh(&bat_priv->tvlv.handler_list_lock); - hlist_del_rcu(&tvlv_handler->list); - spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); - batadv_tvlv_handler_put(tvlv_handler); -} - -/** - * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the - * specified host - * @bat_priv: the bat priv with all the soft interface information - * @src: source mac address of the unicast packet - * @dst: destination mac address of the unicast packet - * @type: tvlv type - * @version: tvlv version - * @tvlv_value: tvlv content - * @tvlv_value_len: tvlv content length - */ -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, - u8 *dst, u8 type, u8 version, - void *tvlv_value, u16 tvlv_value_len) -{ - struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; - struct batadv_tvlv_hdr *tvlv_hdr; - struct batadv_orig_node *orig_node; - struct sk_buff *skb; - unsigned char *tvlv_buff; - unsigned int tvlv_len; - ssize_t hdr_len = sizeof(*unicast_tvlv_packet); - - orig_node = batadv_orig_hash_find(bat_priv, dst); - if (!orig_node) - return; - - tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len; - - skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len); - if (!skb) - goto out; - - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, ETH_HLEN); - tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); - unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; - unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV; - unicast_tvlv_packet->version = BATADV_COMPAT_VERSION; - unicast_tvlv_packet->ttl = BATADV_TTL; - unicast_tvlv_packet->reserved = 0; - unicast_tvlv_packet->tvlv_len = htons(tvlv_len); - unicast_tvlv_packet->align = 0; - ether_addr_copy(unicast_tvlv_packet->src, src); - ether_addr_copy(unicast_tvlv_packet->dst, dst); - - tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1); - tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff; - tvlv_hdr->version = version; - tvlv_hdr->type = type; - tvlv_hdr->len = htons(tvlv_value_len); - tvlv_buff += sizeof(*tvlv_hdr); - memcpy(tvlv_buff, tvlv_value, tvlv_value_len); - - if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) - kfree_skb(skb); -out: - batadv_orig_node_put(orig_node); -} - -/** * batadv_get_vid - extract the VLAN identifier from skb if any * @skb: the buffer containing the packet * @header_len: length of the batman header preceding the ethernet header @@ -1284,36 +629,6 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) return ap_isolation_enabled; } -static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) -{ - struct batadv_algo_ops *bat_algo_ops; - char *algo_name = (char *)val; - size_t name_len = strlen(algo_name); - - if (name_len > 0 && algo_name[name_len - 1] == '\n') - algo_name[name_len - 1] = '\0'; - - bat_algo_ops = batadv_algo_get(algo_name); - if (!bat_algo_ops) { - pr_err("Routing algorithm '%s' is not supported\n", algo_name); - return -EINVAL; - } - - return param_set_copystring(algo_name, kp); -} - -static const struct kernel_param_ops batadv_param_ops_ra = { - .set = batadv_param_set_ra, - .get = param_get_string, -}; - -static struct kparam_string batadv_param_string_ra = { - .maxlen = sizeof(batadv_routing_algo), - .string = batadv_routing_algo, -}; - -module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, - 0644); module_init(batadv_init); module_exit(batadv_exit); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 76925266deed..06a860845434 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2016.2" +#define BATADV_SOURCE_VERSION "2016.3" #endif /* B.A.T.M.A.N. parameters */ @@ -100,6 +100,9 @@ #define BATADV_NUM_BCASTS_WIRELESS 3 #define BATADV_NUM_BCASTS_MAX 3 +/* length of the single packet used by the TP meter */ +#define BATADV_TP_PACKET_LEN ETH_DATA_LEN + /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ #define ARP_REQ_DELAY 250 /* numbers of originator to contact for any PUT/GET DHT operation */ @@ -131,6 +134,11 @@ #define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */ +/** + * BATADV_TP_MAX_NUM - maximum number of simultaneously active tp sessions + */ +#define BATADV_TP_MAX_NUM 5 + enum batadv_mesh_state { BATADV_MESH_INACTIVE, BATADV_MESH_ACTIVE, @@ -175,29 +183,26 @@ enum batadv_uev_type { /* Kernel headers */ -#include <linux/atomic.h> #include <linux/bitops.h> /* for packet.h */ #include <linux/compiler.h> #include <linux/cpumask.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> /* for packet.h */ -#include <linux/netdevice.h> -#include <linux/printk.h> -#include <linux/types.h> -#include <linux/percpu.h> -#include <linux/jiffies.h> #include <linux/if_vlan.h> +#include <linux/jiffies.h> +#include <linux/percpu.h> +#include <linux/types.h> #include "types.h" -struct batadv_ogm_packet; +struct net_device; +struct packet_type; struct seq_file; struct sk_buff; #define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \ (int)(vid & VLAN_VID_MASK) : -1) -extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; extern unsigned char batadv_broadcast_addr[]; @@ -218,74 +223,9 @@ batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)); void batadv_recv_handler_unregister(u8 packet_type); -int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); -int batadv_algo_select(struct batadv_priv *bat_priv, char *name); -int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr); /** - * enum batadv_dbg_level - available log levels - * @BATADV_DBG_BATMAN: OGM and TQ computations related messages - * @BATADV_DBG_ROUTES: route added / changed / deleted - * @BATADV_DBG_TT: translation table messages - * @BATADV_DBG_BLA: bridge loop avoidance messages - * @BATADV_DBG_DAT: ARP snooping and DAT related messages - * @BATADV_DBG_NC: network coding related messages - * @BATADV_DBG_ALL: the union of all the above log levels - */ -enum batadv_dbg_level { - BATADV_DBG_BATMAN = BIT(0), - BATADV_DBG_ROUTES = BIT(1), - BATADV_DBG_TT = BIT(2), - BATADV_DBG_BLA = BIT(3), - BATADV_DBG_DAT = BIT(4), - BATADV_DBG_NC = BIT(5), - BATADV_DBG_ALL = 63, -}; - -#ifdef CONFIG_BATMAN_ADV_DEBUG -int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) -__printf(2, 3); - -/* possibly ratelimited debug output */ -#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ - do { \ - if (atomic_read(&bat_priv->log_level) & type && \ - (!ratelimited || net_ratelimit())) \ - batadv_debug_log(bat_priv, fmt, ## arg);\ - } \ - while (0) -#else /* !CONFIG_BATMAN_ADV_DEBUG */ -__printf(4, 5) -static inline void _batadv_dbg(int type __always_unused, - struct batadv_priv *bat_priv __always_unused, - int ratelimited __always_unused, - const char *fmt __always_unused, ...) -{ -} -#endif - -#define batadv_dbg(type, bat_priv, arg...) \ - _batadv_dbg(type, bat_priv, 0, ## arg) -#define batadv_dbg_ratelimited(type, bat_priv, arg...) \ - _batadv_dbg(type, bat_priv, 1, ## arg) - -#define batadv_info(net_dev, fmt, arg...) \ - do { \ - struct net_device *_netdev = (net_dev); \ - struct batadv_priv *_batpriv = netdev_priv(_netdev); \ - batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ - pr_info("%s: " fmt, _netdev->name, ## arg); \ - } while (0) -#define batadv_err(net_dev, fmt, arg...) \ - do { \ - struct net_device *_netdev = (net_dev); \ - struct batadv_priv *_batpriv = netdev_priv(_netdev); \ - batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ - pr_err("%s: " fmt, _netdev->name, ## arg); \ - } while (0) - -/** * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses * @data1: Pointer to a six-byte array containing the Ethernet address * @data2: Pointer other six-byte array containing the Ethernet address @@ -370,39 +310,6 @@ static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) */ #define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0])) -void batadv_tvlv_container_register(struct batadv_priv *bat_priv, - u8 type, u8 version, - void *tvlv_value, u16 tvlv_value_len); -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, int packet_min_len); -void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, - struct batadv_ogm_packet *batadv_ogm_packet, - struct batadv_orig_node *orig_node); -void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, - u8 type, u8 version); - -void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, - void (*optr)(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig, - u8 flags, - void *tvlv_value, - u16 tvlv_value_len), - int (*uptr)(struct batadv_priv *bat_priv, - u8 *src, u8 *dst, - void *tvlv_value, - u16 tvlv_value_len), - u8 type, u8 version, u8 flags); -void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, - u8 type, u8 version); -int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, - bool ogm_source, - struct batadv_orig_node *orig_node, - u8 *src, u8 *dst, - void *tvlv_buff, u16 tvlv_buff_len); -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, - u8 *dst, u8 type, u8 version, - void *tvlv_value, u16 tvlv_value_len); unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len); bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid); diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index c32f24fafe67..cc915073a753 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -25,17 +25,23 @@ #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> +#include <linux/icmpv6.h> +#include <linux/if_bridge.h> #include <linux/if_ether.h> -#include <linux/in6.h> +#include <linux/igmp.h> #include <linux/in.h> +#include <linux/in6.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> +#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -43,18 +49,57 @@ #include <linux/string.h> #include <linux/types.h> #include <net/addrconf.h> +#include <net/if_inet6.h> +#include <net/ip.h> #include <net/ipv6.h> +#include "hard-interface.h" +#include "hash.h" +#include "log.h" #include "packet.h" #include "translation-table.h" +#include "tvlv.h" + +/** + * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists + * @soft_iface: netdev struct of the mesh interface + * + * If the given soft interface has a bridge on top then the refcount + * of the according net device is increased. + * + * Return: NULL if no such bridge exists. Otherwise the net device of the + * bridge. + */ +static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) +{ + struct net_device *upper = soft_iface; + + rcu_read_lock(); + do { + upper = netdev_master_upper_dev_get_rcu(upper); + } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); + + if (upper) + dev_hold(upper); + rcu_read_unlock(); + + return upper; +} /** * batadv_mcast_mla_softif_get - get softif multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * - * Collect multicast addresses of the local multicast listeners - * on the given soft interface, dev, in the given mcast_list. + * Collects multicast addresses of multicast listeners residing + * on this kernel on the given soft interface, dev, in + * the given mcast_list. In general, multicast listeners provided by + * your multicast receiving applications run directly on this node. + * + * If there is a bridge interface on top of dev, collects from that one + * instead. Just like with IP addresses and routes, multicast listeners + * will(/should) register to the bridge interface instead of an + * enslaved bat0. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. @@ -62,12 +107,13 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, struct hlist_head *mcast_list) { + struct net_device *bridge = batadv_mcast_get_bridge(dev); struct netdev_hw_addr *mc_list_entry; struct batadv_hw_addr *new; int ret = 0; - netif_addr_lock_bh(dev); - netdev_for_each_mc_addr(mc_list_entry, dev) { + netif_addr_lock_bh(bridge ? bridge : dev); + netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) { new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; @@ -78,7 +124,10 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, hlist_add_head(&new->list, mcast_list); ret++; } - netif_addr_unlock_bh(dev); + netif_addr_unlock_bh(bridge ? bridge : dev); + + if (bridge) + dev_put(bridge); return ret; } @@ -104,6 +153,83 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, } /** + * batadv_mcast_mla_br_addr_cpy - copy a bridge multicast address + * @dst: destination to write to - a multicast MAC address + * @src: source to read from - a multicast IP address + * + * Converts a given multicast IPv4/IPv6 address from a bridge + * to its matching multicast MAC address and copies it into the given + * destination buffer. + * + * Caller needs to make sure the destination buffer can hold + * at least ETH_ALEN bytes. + */ +static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) +{ + if (src->proto == htons(ETH_P_IP)) + ip_eth_mc_map(src->u.ip4, dst); +#if IS_ENABLED(CONFIG_IPV6) + else if (src->proto == htons(ETH_P_IPV6)) + ipv6_eth_mc_map(&src->u.ip6, dst); +#endif + else + eth_zero_addr(dst); +} + +/** + * batadv_mcast_mla_bridge_get - get bridged-in multicast listeners + * @dev: a bridge slave whose bridge to collect multicast addresses from + * @mcast_list: a list to put found addresses into + * + * Collects multicast addresses of multicast listeners residing + * on foreign, non-mesh devices which we gave access to our mesh via + * a bridge on top of the given soft interface, dev, in the given + * mcast_list. + * + * Return: -ENOMEM on memory allocation error or the number of + * items added to the mcast_list otherwise. + */ +static int batadv_mcast_mla_bridge_get(struct net_device *dev, + struct hlist_head *mcast_list) +{ + struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list); + struct br_ip_list *br_ip_entry, *tmp; + struct batadv_hw_addr *new; + u8 mcast_addr[ETH_ALEN]; + int ret; + + /* we don't need to detect these devices/listeners, the IGMP/MLD + * snooping code of the Linux bridge already does that for us + */ + ret = br_multicast_list_adjacent(dev, &bridge_mcast_list); + if (ret < 0) + goto out; + + list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) { + batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr); + if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) + continue; + + new = kmalloc(sizeof(*new), GFP_ATOMIC); + if (!new) { + ret = -ENOMEM; + break; + } + + ether_addr_copy(new->addr, mcast_addr); + hlist_add_head(&new->list, mcast_list); + } + +out: + list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) { + list_del(&br_ip_entry->list); + kfree(br_ip_entry); + } + + return ret; +} + +/** * batadv_mcast_mla_list_free - free a list of multicast addresses * @bat_priv: the bat priv with all the soft interface information * @mcast_list: the list to free @@ -214,44 +340,195 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv) } /** + * batadv_mcast_querier_log - debug output regarding the querier status on link + * @bat_priv: the bat priv with all the soft interface information + * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD") + * @old_state: the previous querier state on our link + * @new_state: the new querier state on our link + * + * Outputs debug messages to the logging facility with log level 'mcast' + * regarding changes to the querier status on the link which are relevant + * to our multicast optimizations. + * + * Usually this is about whether a querier appeared or vanished in + * our mesh or whether the querier is in the suboptimal position of being + * behind our local bridge segment: Snooping switches will directly + * forward listener reports to the querier, therefore batman-adv and + * the bridge will potentially not see these listeners - the querier is + * potentially shadowing listeners from us then. + * + * This is only interesting for nodes with a bridge on top of their + * soft interface. + */ +static void +batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, + struct batadv_mcast_querier_state *old_state, + struct batadv_mcast_querier_state *new_state) +{ + if (!old_state->exists && new_state->exists) + batadv_info(bat_priv->soft_iface, "%s Querier appeared\n", + str_proto); + else if (old_state->exists && !new_state->exists) + batadv_info(bat_priv->soft_iface, + "%s Querier disappeared - multicast optimizations disabled\n", + str_proto); + else if (!bat_priv->mcast.bridged && !new_state->exists) + batadv_info(bat_priv->soft_iface, + "No %s Querier present - multicast optimizations disabled\n", + str_proto); + + if (new_state->exists) { + if ((!old_state->shadowing && new_state->shadowing) || + (!old_state->exists && new_state->shadowing)) + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "%s Querier is behind our bridged segment: Might shadow listeners\n", + str_proto); + else if (old_state->shadowing && !new_state->shadowing) + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "%s Querier is not behind our bridged segment\n", + str_proto); + } +} + +/** + * batadv_mcast_bridge_log - debug output for topology changes in bridged setups + * @bat_priv: the bat priv with all the soft interface information + * @bridged: a flag about whether the soft interface is currently bridged or not + * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier + * @querier_ipv6: (maybe) new status of a potential, selected MLD querier + * + * If no bridges are ever used on this node, then this function does nothing. + * + * Otherwise this function outputs debug information to the 'mcast' log level + * which might be relevant to our multicast optimizations. + * + * More precisely, it outputs information when a bridge interface is added or + * removed from a soft interface. And when a bridge is present, it further + * outputs information about the querier state which is relevant for the + * multicast flags this node is going to set. + */ +static void +batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged, + struct batadv_mcast_querier_state *querier_ipv4, + struct batadv_mcast_querier_state *querier_ipv6) +{ + if (!bat_priv->mcast.bridged && bridged) + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "Bridge added: Setting Unsnoopables(U)-flag\n"); + else if (bat_priv->mcast.bridged && !bridged) + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "Bridge removed: Unsetting Unsnoopables(U)-flag\n"); + + if (bridged) { + batadv_mcast_querier_log(bat_priv, "IGMP", + &bat_priv->mcast.querier_ipv4, + querier_ipv4); + batadv_mcast_querier_log(bat_priv, "MLD", + &bat_priv->mcast.querier_ipv6, + querier_ipv6); + } +} + +/** + * batadv_mcast_flags_logs - output debug information about mcast flag changes + * @bat_priv: the bat priv with all the soft interface information + * @flags: flags indicating the new multicast state + * + * Whenever the multicast flags this nodes announces changes (@mcast_flags vs. + * bat_priv->mcast.flags), this notifies userspace via the 'mcast' log level. + */ +static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) +{ + u8 old_flags = bat_priv->mcast.flags; + char str_old_flags[] = "[...]"; + + sprintf(str_old_flags, "[%c%c%c]", + (old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', + (old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', + (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.'); + + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "Changing multicast flags from '%s' to '[%c%c%c]'\n", + bat_priv->mcast.enabled ? str_old_flags : "<undefined>", + (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.'); +} + +/** * batadv_mcast_mla_tvlv_update - update multicast tvlv * @bat_priv: the bat priv with all the soft interface information * * Updates the own multicast tvlv with our current multicast related settings, * capabilities and inabilities. * - * Return: true if the tvlv container is registered afterwards. Otherwise - * returns false. + * Return: false if we want all IPv4 && IPv6 multicast traffic and true + * otherwise. */ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) { struct batadv_tvlv_mcast_data mcast_data; + struct batadv_mcast_querier_state querier4 = {false, false}; + struct batadv_mcast_querier_state querier6 = {false, false}; + struct net_device *dev = bat_priv->soft_iface; + bool bridged; mcast_data.flags = BATADV_NO_FLAGS; memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved)); - /* Avoid attaching MLAs, if there is a bridge on top of our soft - * interface, we don't support that yet (TODO) + bridged = batadv_mcast_has_bridge(bat_priv); + if (!bridged) + goto update; + +#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) + pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); +#endif + + querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP); + querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP); + + querier6.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6); + querier6.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6); + + mcast_data.flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES; + + /* 1) If no querier exists at all, then multicast listeners on + * our local TT clients behind the bridge will keep silent. + * 2) If the selected querier is on one of our local TT clients, + * behind the bridge, then this querier might shadow multicast + * listeners on our local TT clients, behind this bridge. + * + * In both cases, we will signalize other batman nodes that + * we need all multicast traffic of the according protocol. */ - if (batadv_mcast_has_bridge(bat_priv)) { - if (bat_priv->mcast.enabled) { - batadv_tvlv_container_unregister(bat_priv, - BATADV_TVLV_MCAST, 1); - bat_priv->mcast.enabled = false; - } + if (!querier4.exists || querier4.shadowing) + mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV4; - return false; - } + if (!querier6.exists || querier6.shadowing) + mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV6; + +update: + batadv_mcast_bridge_log(bat_priv, bridged, &querier4, &querier6); + + bat_priv->mcast.querier_ipv4.exists = querier4.exists; + bat_priv->mcast.querier_ipv4.shadowing = querier4.shadowing; + + bat_priv->mcast.querier_ipv6.exists = querier6.exists; + bat_priv->mcast.querier_ipv6.shadowing = querier6.shadowing; + + bat_priv->mcast.bridged = bridged; if (!bat_priv->mcast.enabled || mcast_data.flags != bat_priv->mcast.flags) { - batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 1, + batadv_mcast_flags_log(bat_priv, mcast_data.flags); + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2, &mcast_data, sizeof(mcast_data)); bat_priv->mcast.flags = mcast_data.flags; bat_priv->mcast.enabled = true; } - return true; + return !(mcast_data.flags & + (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6)); } /** @@ -274,6 +551,10 @@ void batadv_mcast_mla_update(struct batadv_priv *bat_priv) if (ret < 0) goto out; + ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list); + if (ret < 0) + goto out; + update: batadv_mcast_mla_tt_retract(bat_priv, &mcast_list); batadv_mcast_mla_tt_add(bat_priv, &mcast_list); @@ -283,6 +564,31 @@ out: } /** + * batadv_mcast_is_report_ipv4 - check for IGMP reports + * @skb: the ethernet frame destined for the mesh + * + * This call might reallocate skb data. + * + * Checks whether the given frame is a valid IGMP report. + * + * Return: If so then true, otherwise false. + */ +static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) +{ + if (ip_mc_check_igmp(skb, NULL) < 0) + return false; + + switch (igmp_hdr(skb)->type) { + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + case IGMPV3_HOST_MEMBERSHIP_REPORT: + return true; + } + + return false; +} + +/** * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential * @bat_priv: the bat priv with all the soft interface information * @skb: the IPv4 packet to check @@ -304,6 +610,9 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr))) return -ENOMEM; + if (batadv_mcast_is_report_ipv4(skb)) + return -EINVAL; + iphdr = ip_hdr(skb); /* TODO: Implement Multicast Router Discovery (RFC4286), @@ -320,6 +629,31 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, return 0; } +#if IS_ENABLED(CONFIG_IPV6) +/** + * batadv_mcast_is_report_ipv6 - check for MLD reports + * @skb: the ethernet frame destined for the mesh + * + * This call might reallocate skb data. + * + * Checks whether the given frame is a valid MLD report. + * + * Return: If so then true, otherwise false. + */ +static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) +{ + if (ipv6_mc_check_mld(skb, NULL) < 0) + return false; + + switch (icmp6_hdr(skb)->icmp6_type) { + case ICMPV6_MGM_REPORT: + case ICMPV6_MLD2_REPORT: + return true; + } + + return false; +} + /** * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential * @bat_priv: the bat priv with all the soft interface information @@ -341,6 +675,9 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr))) return -ENOMEM; + if (batadv_mcast_is_report_ipv6(skb)) + return -EINVAL; + ip6hdr = ipv6_hdr(skb); /* TODO: Implement Multicast Router Discovery (RFC4286), @@ -357,6 +694,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, return 0; } +#endif /** * batadv_mcast_forw_mode_check - check for optimized forwarding potential @@ -385,9 +723,11 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, case ETH_P_IP: return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb, is_unsnoopable); +#if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb, is_unsnoopable); +#endif default: return -EINVAL; } @@ -728,18 +1068,18 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, } /** - * batadv_mcast_tvlv_ogm_handler_v1 - process incoming multicast tvlv container + * batadv_mcast_tvlv_ogm_handler - process incoming multicast tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the multicast data * @tvlv_value_len: tvlv buffer length */ -static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig, - u8 flags, - void *tvlv_value, - u16 tvlv_value_len) +static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + u8 flags, + void *tvlv_value, + u16 tvlv_value_len) { bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND); u8 mcast_flags = BATADV_NO_FLAGS; @@ -789,19 +1129,120 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, */ void batadv_mcast_init(struct batadv_priv *bat_priv) { - batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler_v1, - NULL, BATADV_TVLV_MCAST, 1, + batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, + NULL, BATADV_TVLV_MCAST, 2, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); } /** + * batadv_mcast_flags_print_header - print own mcast flags to debugfs table + * @bat_priv: the bat priv with all the soft interface information + * @seq: debugfs table seq_file struct + * + * Prints our own multicast flags including a more specific reason why + * they are set, that is prints the bridge and querier state too, to + * the debugfs table specified via @seq. + */ +static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + u8 flags = bat_priv->mcast.flags; + char querier4, querier6, shadowing4, shadowing6; + bool bridged = bat_priv->mcast.bridged; + + if (bridged) { + querier4 = bat_priv->mcast.querier_ipv4.exists ? '.' : '4'; + querier6 = bat_priv->mcast.querier_ipv6.exists ? '.' : '6'; + shadowing4 = bat_priv->mcast.querier_ipv4.shadowing ? '4' : '.'; + shadowing6 = bat_priv->mcast.querier_ipv6.shadowing ? '6' : '.'; + } else { + querier4 = '?'; + querier6 = '?'; + shadowing4 = '?'; + shadowing6 = '?'; + } + + seq_printf(seq, "Multicast flags (own flags: [%c%c%c])\n", + (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.'); + seq_printf(seq, "* Bridged [U]\t\t\t\t%c\n", bridged ? 'U' : '.'); + seq_printf(seq, "* No IGMP/MLD Querier [4/6]:\t\t%c/%c\n", + querier4, querier6); + seq_printf(seq, "* Shadowing IGMP/MLD Querier [4/6]:\t%c/%c\n", + shadowing4, shadowing6); + seq_puts(seq, "-------------------------------------------\n"); + seq_printf(seq, " %-10s %s\n", "Originator", "Flags"); +} + +/** + * batadv_mcast_flags_seq_print_text - print the mcast flags of other nodes + * @seq: seq file to print on + * @offset: not used + * + * This prints a table of (primary) originators and their according + * multicast flags, including (in the header) our own. + * + * Return: always 0 + */ +int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct batadv_orig_node *orig_node; + struct hlist_head *head; + u8 flags; + u32 i; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + return 0; + + batadv_mcast_flags_print_header(bat_priv, seq); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig_node->capa_initialized)) + continue; + + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig_node->capabilities)) { + seq_printf(seq, "%pM -\n", orig_node->orig); + continue; + } + + flags = orig_node->mcast_flags; + + seq_printf(seq, "%pM [%c%c%c]\n", orig_node->orig, + (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) + ? 'U' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV4) + ? '4' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV6) + ? '6' : '.'); + } + rcu_read_unlock(); + } + + batadv_hardif_put(primary_if); + + return 0; +} + +/** * batadv_mcast_free - free the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_mcast_free(struct batadv_priv *bat_priv) { - batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1); - batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1); + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); spin_lock_bh(&bat_priv->tt.commit_lock); batadv_mcast_mla_tt_retract(bat_priv, NULL); diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 80bceec55592..1fb00ba84907 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -20,6 +20,7 @@ #include "main.h" +struct seq_file; struct sk_buff; /** @@ -46,6 +47,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, void batadv_mcast_init(struct batadv_priv *bat_priv); +int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset); + void batadv_mcast_free(struct batadv_priv *bat_priv); void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c new file mode 100644 index 000000000000..231f8eaf075b --- /dev/null +++ b/net/batman-adv/netlink.c @@ -0,0 +1,424 @@ +/* Copyright (C) 2016 B.A.T.M.A.N. contributors: + * + * Matthias Schiffer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "netlink.h" +#include "main.h" + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/genetlink.h> +#include <linux/if_ether.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/printk.h> +#include <linux/stddef.h> +#include <linux/types.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <uapi/linux/batman_adv.h> + +#include "hard-interface.h" +#include "soft-interface.h" +#include "tp_meter.h" + +struct sk_buff; + +static struct genl_family batadv_netlink_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = BATADV_NL_NAME, + .version = 1, + .maxattr = BATADV_ATTR_MAX, +}; + +/* multicast groups */ +enum batadv_netlink_multicast_groups { + BATADV_NL_MCGRP_TPMETER, +}; + +static struct genl_multicast_group batadv_netlink_mcgrps[] = { + [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, +}; + +static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { + [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, + [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, + [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, + [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING }, + [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 }, + [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING }, + [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 }, + [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, + [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, + [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, +}; + +/** + * batadv_netlink_mesh_info_put - fill in generic information about mesh + * interface + * @msg: netlink message to be sent back + * @soft_iface: interface for which the data should be taken + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_hard_iface *primary_if = NULL; + struct net_device *hard_iface; + int ret = -ENOBUFS; + + if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || + nla_put_string(msg, BATADV_ATTR_ALGO_NAME, + bat_priv->algo_ops->name) || + nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) || + nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) || + nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN, + soft_iface->dev_addr)) + goto out; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { + hard_iface = primary_if->net_dev; + + if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + hard_iface->ifindex) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + hard_iface->name) || + nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, + hard_iface->dev_addr)) + goto out; + } + + ret = 0; + + out: + if (primary_if) + batadv_hardif_put(primary_if); + + return ret; +} + +/** + * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO + * netlink request + * @skb: received netlink message + * @info: receiver information + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct net_device *soft_iface; + struct sk_buff *msg = NULL; + void *msg_head; + int ifindex; + int ret; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &batadv_netlink_family, 0, + BATADV_CMD_GET_MESH_INFO); + if (!msg_head) { + ret = -ENOBUFS; + goto out; + } + + ret = batadv_netlink_mesh_info_put(msg, soft_iface); + + out: + if (soft_iface) + dev_put(soft_iface); + + if (ret) { + if (msg) + nlmsg_free(msg); + return ret; + } + + genlmsg_end(msg, msg_head); + return genlmsg_reply(msg, info); +} + +/** + * batadv_netlink_tp_meter_put - Fill information of started tp_meter session + * @msg: netlink message to be sent back + * @cookie: tp meter session cookie + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie) +{ + if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie)) + return -ENOBUFS; + + return 0; +} + +/** + * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client + * @bat_priv: the bat priv with all the soft interface information + * @dst: destination of tp_meter session + * @result: reason for tp meter session stop + * @test_time: total time ot the tp_meter session + * @total_bytes: bytes acked to the receiver + * @cookie: cookie of tp_meter session + * + * Return: 0 on success, < 0 on error + */ +int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, + u8 result, u32 test_time, u64 total_bytes, + u32 cookie) +{ + struct sk_buff *msg; + void *hdr; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &batadv_netlink_family, 0, + BATADV_CMD_TP_METER); + if (!hdr) { + ret = -ENOBUFS; + goto err_genlmsg; + } + + if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_TPMETER_TEST_TIME, test_time)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, BATADV_ATTR_TPMETER_BYTES, total_bytes, + BATADV_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_TPMETER_RESULT, result)) + goto nla_put_failure; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, dst)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_TPMETER, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + ret = -EMSGSIZE; + +err_genlmsg: + nlmsg_free(msg); + return ret; +} + +/** + * batadv_netlink_tp_meter_start - Start a new tp_meter session + * @skb: received netlink message + * @info: receiver information + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + struct sk_buff *msg = NULL; + u32 test_length; + void *msg_head; + int ifindex; + u32 cookie; + u8 *dst; + int ret; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return -EINVAL; + + if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) + return -EINVAL; + + if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + if (!ifindex) + return -EINVAL; + + dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); + + test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]); + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &batadv_netlink_family, 0, + BATADV_CMD_TP_METER); + if (!msg_head) { + ret = -ENOBUFS; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + batadv_tp_start(bat_priv, dst, test_length, &cookie); + + ret = batadv_netlink_tp_meter_put(msg, cookie); + + out: + if (soft_iface) + dev_put(soft_iface); + + if (ret) { + if (msg) + nlmsg_free(msg); + return ret; + } + + genlmsg_end(msg, msg_head); + return genlmsg_reply(msg, info); +} + +/** + * batadv_netlink_tp_meter_start - Cancel a running tp_meter session + * @skb: received netlink message + * @info: receiver information + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + int ifindex; + u8 *dst; + int ret = 0; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return -EINVAL; + + if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + if (!ifindex) + return -EINVAL; + + dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL); + +out: + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + +static struct genl_ops batadv_netlink_ops[] = { + { + .cmd = BATADV_CMD_GET_MESH_INFO, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_get_mesh_info, + }, + { + .cmd = BATADV_CMD_TP_METER, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_tp_meter_start, + }, + { + .cmd = BATADV_CMD_TP_METER_CANCEL, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_tp_meter_cancel, + }, +}; + +/** + * batadv_netlink_register - register batadv genl netlink family + */ +void __init batadv_netlink_register(void) +{ + int ret; + + ret = genl_register_family_with_ops_groups(&batadv_netlink_family, + batadv_netlink_ops, + batadv_netlink_mcgrps); + if (ret) + pr_warn("unable to register netlink family"); +} + +/** + * batadv_netlink_unregister - unregister batadv genl netlink family + */ +void batadv_netlink_unregister(void) +{ + genl_unregister_family(&batadv_netlink_family); +} diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h new file mode 100644 index 000000000000..945653ab58c6 --- /dev/null +++ b/net/batman-adv/netlink.h @@ -0,0 +1,32 @@ +/* Copyright (C) 2016 B.A.T.M.A.N. contributors: + * + * Matthias Schiffer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_NETLINK_H_ +#define _NET_BATMAN_ADV_NETLINK_H_ + +#include "main.h" + +#include <linux/types.h> + +void batadv_netlink_register(void); +void batadv_netlink_unregister(void); + +int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, + u8 result, u32 test_time, u64 total_bytes, + u32 cookie); + +#endif /* _NET_BATMAN_ADV_NETLINK_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 678f06865312..293ef4ffd4e1 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -51,10 +51,12 @@ #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "routing.h" #include "send.h" +#include "tvlv.h" static struct lock_class_key batadv_nc_coding_hash_lock_class_key; static struct lock_class_key batadv_nc_decoding_hash_lock_class_key; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 7f51bc2c06eb..7d1e5421f6bc 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -34,11 +34,13 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "distributed-arp-table.h" #include "fragmentation.h" #include "gateway_client.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "multicast.h" #include "network-coding.h" #include "routing.h" @@ -251,10 +253,8 @@ static void batadv_neigh_node_release(struct kref *ref) struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; - struct batadv_algo_ops *bao; neigh_node = container_of(ref, struct batadv_neigh_node, refcount); - bao = neigh_node->orig_node->bat_priv->bat_algo_ops; hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh_node->ifinfo_list, list) { @@ -263,9 +263,6 @@ static void batadv_neigh_node_release(struct kref *ref) batadv_hardif_neigh_put(neigh_node->hardif_neigh); - if (bao->bat_neigh_free) - bao->bat_neigh_free(neigh_node); - batadv_hardif_put(neigh_node->if_incoming); kfree_rcu(neigh_node, rcu); @@ -537,8 +534,8 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, kref_init(&hardif_neigh->refcount); - if (bat_priv->bat_algo_ops->bat_hardif_neigh_init) - bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh); + if (bat_priv->algo_ops->neigh.hardif_init) + bat_priv->algo_ops->neigh.hardif_init(hardif_neigh); hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); @@ -602,19 +599,19 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, } /** - * batadv_neigh_node_new - create and init a new neigh_node object + * batadv_neigh_node_create - create a neigh node object * @orig_node: originator object representing the neighbour * @hard_iface: the interface where the neighbour is connected to * @neigh_addr: the mac address of the neighbour interface * * Allocates a new neigh_node object and initialises all the generic fields. * - * Return: neighbor when found. Othwerwise NULL + * Return: the neighbour node if found or created or NULL otherwise. */ -struct batadv_neigh_node * -batadv_neigh_node_new(struct batadv_orig_node *orig_node, - struct batadv_hard_iface *hard_iface, - const u8 *neigh_addr) +static struct batadv_neigh_node * +batadv_neigh_node_create(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) { struct batadv_neigh_node *neigh_node; struct batadv_hardif_neigh_node *hardif_neigh = NULL; @@ -667,6 +664,29 @@ out: } /** + * batadv_neigh_node_get_or_create - retrieve or create a neigh node object + * @orig_node: originator object representing the neighbour + * @hard_iface: the interface where the neighbour is connected to + * @neigh_addr: the mac address of the neighbour interface + * + * Return: the neighbour node if found or created or NULL otherwise. + */ +struct batadv_neigh_node * +batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_neigh_node *neigh_node = NULL; + + /* first check without locking to avoid the overhead */ + neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); + if (neigh_node) + return neigh_node; + + return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr); +} + +/** * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list * @seq: neighbour table seq_file struct * @offset: not used @@ -686,17 +706,17 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name, - bat_priv->bat_algo_ops->name); + bat_priv->algo_ops->name); batadv_hardif_put(primary_if); - if (!bat_priv->bat_algo_ops->bat_neigh_print) { + if (!bat_priv->algo_ops->neigh.print) { seq_puts(seq, "No printing function for this routing protocol\n"); return 0; } - bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq); + bat_priv->algo_ops->neigh.print(bat_priv, seq); return 0; } @@ -747,8 +767,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) batadv_frag_purge_orig(orig_node, NULL); - if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) - orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); + if (orig_node->bat_priv->algo_ops->orig.free) + orig_node->bat_priv->algo_ops->orig.free(orig_node); kfree(orig_node->tt_buff); kfree(orig_node); @@ -1077,12 +1097,12 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_node *best = NULL, *neigh; - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; rcu_read_lock(); hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) { - if (best && (bao->bat_neigh_cmp(neigh, if_outgoing, - best, if_outgoing) <= 0)) + if (best && (bao->neigh.cmp(neigh, if_outgoing, best, + if_outgoing) <= 0)) continue; if (!kref_get_unless_zero(&neigh->refcount)) @@ -1234,18 +1254,17 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name, - bat_priv->bat_algo_ops->name); + bat_priv->algo_ops->name); batadv_hardif_put(primary_if); - if (!bat_priv->bat_algo_ops->bat_orig_print) { + if (!bat_priv->algo_ops->orig.print) { seq_puts(seq, "No printing function for this routing protocol\n"); return 0; } - bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, - BATADV_IF_DEFAULT); + bat_priv->algo_ops->orig.print(bat_priv, seq, BATADV_IF_DEFAULT); return 0; } @@ -1272,7 +1291,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) } bat_priv = netdev_priv(hard_iface->soft_iface); - if (!bat_priv->bat_algo_ops->bat_orig_print) { + if (!bat_priv->algo_ops->orig.print) { seq_puts(seq, "No printing function for this routing protocol\n"); goto out; @@ -1286,9 +1305,9 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, hard_iface->net_dev->name, hard_iface->net_dev->dev_addr, - hard_iface->soft_iface->name, bat_priv->bat_algo_ops->name); + hard_iface->soft_iface->name, bat_priv->algo_ops->name); - bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface); + bat_priv->algo_ops->orig.print(bat_priv, seq, hard_iface); out: if (hard_iface) @@ -1300,7 +1319,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; @@ -1316,9 +1335,8 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { ret = 0; - if (bao->bat_orig_add_if) - ret = bao->bat_orig_add_if(orig_node, - max_if_num); + if (bao->orig.add_if) + ret = bao->orig.add_if(orig_node, max_if_num); if (ret == -ENOMEM) goto err; } @@ -1340,7 +1358,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, struct hlist_head *head; struct batadv_hard_iface *hard_iface_tmp; struct batadv_orig_node *orig_node; - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; u32 i; int ret; @@ -1353,10 +1371,9 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { ret = 0; - if (bao->bat_orig_del_if) - ret = bao->bat_orig_del_if(orig_node, - max_if_num, - hard_iface->if_num); + if (bao->orig.del_if) + ret = bao->orig.del_if(orig_node, max_if_num, + hard_iface->if_num); if (ret == -ENOMEM) goto err; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 64a8951e5844..566306bf05dc 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -46,9 +46,9 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh); struct batadv_neigh_node * -batadv_neigh_node_new(struct batadv_orig_node *orig_node, - struct batadv_hard_iface *hard_iface, - const u8 *neigh_addr); +batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr); void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 372128ddb474..6b011ff64dd8 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -21,6 +21,8 @@ #include <asm/byteorder.h> #include <linux/types.h> +#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0) + /** * enum batadv_packettype - types for batman-adv encapsulated packets * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV @@ -93,6 +95,7 @@ enum batadv_icmp_packettype { BATADV_ECHO_REQUEST = 8, BATADV_TTL_EXCEEDED = 11, BATADV_PARAMETER_PROBLEM = 12, + BATADV_TP = 15, }; /** @@ -285,6 +288,16 @@ struct batadv_elp_packet { #define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet) /** + * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes + * @BATADV_TP_START: start a throughput meter run + * @BATADV_TP_STOP: stop a throughput meter run + */ +enum batadv_icmp_user_cmd_type { + BATADV_TP_START = 0, + BATADV_TP_STOP = 2, +}; + +/** * struct batadv_icmp_header - common members among all the ICMP packets * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header @@ -334,6 +347,47 @@ struct batadv_icmp_packet { __be16 seqno; }; +/** + * struct batadv_icmp_tp_packet - ICMP TP Meter packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier + * @subtype: TP packet subtype (see batadv_icmp_tp_subtype) + * @session: TP session identifier + * @seqno: the TP sequence number + * @timestamp: time when the packet has been sent. This value is filled in a + * TP_MSG and echoed back in the next TP_ACK so that the sender can compute the + * RTT. Since it is read only by the host which wrote it, there is no need to + * store it using network order + */ +struct batadv_icmp_tp_packet { + u8 packet_type; + u8 version; + u8 ttl; + u8 msg_type; /* see ICMP message types above */ + u8 dst[ETH_ALEN]; + u8 orig[ETH_ALEN]; + u8 uid; + u8 subtype; + u8 session[2]; + __be32 seqno; + __be32 timestamp; +}; + +/** + * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes + * @BATADV_TP_MSG: Msg from sender to receiver + * @BATADV_TP_ACK: acknowledgment from receiver to sender + */ +enum batadv_icmp_tp_subtype { + BATADV_TP_MSG = 0, + BATADV_TP_ACK, +}; + #define BATADV_RR_LEN 16 /** @@ -420,6 +474,7 @@ struct batadv_unicast_4addr_packet { * @dest: final destination used when routing fragments * @orig: originator of the fragment used when merging the packet * @no: fragment number within this sequence + * @priority: priority of frame, from ToS IP precedence or 802.1p * @reserved: reserved byte for alignment * @seqno: sequence identification * @total_size: size of the merged packet @@ -430,9 +485,11 @@ struct batadv_frag_packet { u8 ttl; #if defined(__BIG_ENDIAN_BITFIELD) u8 no:4; - u8 reserved:4; + u8 priority:3; + u8 reserved:1; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 reserved:4; + u8 reserved:1; + u8 priority:3; u8 no:4; #else #error "unknown bitfield endianness" diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 396c0134c5ab..af8e11933928 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -40,12 +40,15 @@ #include "fragmentation.h" #include "hard-interface.h" #include "icmp_socket.h" +#include "log.h" #include "network-coding.h" #include "originator.h" #include "packet.h" #include "send.h" #include "soft-interface.h" +#include "tp_meter.h" #include "translation-table.h" +#include "tvlv.h" static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -268,10 +271,19 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmph->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res != NET_XMIT_DROP) - ret = NET_RX_SUCCESS; + if (res == -1) + goto out; + + ret = NET_RX_SUCCESS; break; + case BATADV_TP: + if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet))) + goto out; + + batadv_tp_meter_recv(bat_priv, skb); + ret = NET_RX_SUCCESS; + goto out; default: /* drop unknown type */ goto out; @@ -290,7 +302,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if = NULL; struct batadv_orig_node *orig_node = NULL; struct batadv_icmp_packet *icmp_packet; - int ret = NET_RX_DROP; + int res, ret = NET_RX_DROP; icmp_packet = (struct batadv_icmp_packet *)skb->data; @@ -321,7 +333,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_TTL_EXCEEDED; icmp_packet->ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res != -1) ret = NET_RX_SUCCESS; out: @@ -341,7 +354,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, struct ethhdr *ethhdr; struct batadv_orig_node *orig_node = NULL; int hdr_size = sizeof(struct batadv_icmp_header); - int ret = NET_RX_DROP; + int res, ret = NET_RX_DROP; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) @@ -408,7 +421,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmph->ttl--; /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) + res = batadv_send_skb_to_orig(skb, orig_node, recv_if); + if (res != -1) ret = NET_RX_SUCCESS; out: @@ -469,7 +483,7 @@ batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if) { - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; struct batadv_neigh_node *first_candidate_router = NULL; struct batadv_neigh_node *next_candidate_router = NULL; struct batadv_neigh_node *router, *cand_router = NULL; @@ -523,9 +537,9 @@ batadv_find_router(struct batadv_priv *bat_priv, /* alternative candidate should be good enough to be * considered */ - if (!bao->bat_neigh_is_similar_or_better(cand_router, - cand->if_outgoing, - router, recv_if)) + if (!bao->neigh.is_similar_or_better(cand_router, + cand->if_outgoing, router, + recv_if)) goto next; /* don't use the same router twice */ @@ -645,6 +659,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, len = skb->len; res = batadv_send_skb_to_orig(skb, orig_node, recv_if); + if (res == -1) + goto out; /* translate transmit result into receive result */ if (res == NET_XMIT_SUCCESS) { @@ -652,13 +668,10 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, len + ETH_HLEN); - - ret = NET_RX_SUCCESS; - } else if (res == -EINPROGRESS) { - /* skb was buffered and consumed */ - ret = NET_RX_SUCCESS; } + ret = NET_RX_SUCCESS; + out: if (orig_node) batadv_orig_node_put(orig_node); @@ -1007,6 +1020,8 @@ int batadv_recv_frag_packet(struct sk_buff *skb, if (!orig_node_src) goto out; + skb->priority = frag_packet->priority + 256; + /* Route the fragment if it is not for us and too big to be merged. */ if (!batadv_is_my_mac(bat_priv, frag_packet->dest) && batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) { diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index b1a4e8a811c8..3a10d87b4b76 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -20,10 +20,11 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> -#include <linux/if_ether.h> #include <linux/if.h> +#include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> @@ -42,6 +43,7 @@ #include "fragmentation.h" #include "gateway_client.h" #include "hard-interface.h" +#include "log.h" #include "network-coding.h" #include "originator.h" #include "routing.h" @@ -71,6 +73,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, { struct batadv_priv *bat_priv; struct ethhdr *ethhdr; + int ret; bat_priv = netdev_priv(hard_iface->soft_iface); @@ -108,8 +111,15 @@ int batadv_send_skb_packet(struct sk_buff *skb, /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP * (which is > 0). This will not be treated as an error. + * + * a negative value cannot be returned because it could be interepreted + * as not consumed skb by callers of batadv_send_skb_to_orig. */ - return dev_queue_xmit(skb); + ret = dev_queue_xmit(skb); + if (ret < 0) + ret = NET_XMIT_DROP; + + return ret; send_skb_err: kfree_skb(skb); return NET_XMIT_DROP; @@ -155,8 +165,11 @@ int batadv_send_unicast_skb(struct sk_buff *skb, * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or - * -EINPROGRESS if the skb is buffered for later transmit. + * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the + * skb is buffered for later transmit or the NET_XMIT status returned by the + * lower routine if the packet has been passed down. + * + * If the returning value is not -1 the skb has been consumed. */ int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, @@ -164,7 +177,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; - int ret = NET_XMIT_DROP; + int ret = -1; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); @@ -177,8 +190,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, if (atomic_read(&bat_priv->fragmentation) && skb->len > neigh_node->if_incoming->net_dev->mtu) { /* Fragment and send packet. */ - if (batadv_frag_send_packet(skb, orig_node, neigh_node)) - ret = NET_XMIT_SUCCESS; + ret = batadv_frag_send_packet(skb, orig_node, neigh_node); goto out; } @@ -187,12 +199,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, * (i.e. being forwarded). If the packet originates from this node or if * network coding fails, then send the packet as usual. */ - if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { + if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) ret = -EINPROGRESS; - } else { - batadv_send_unicast_skb(skb, neigh_node); - ret = NET_XMIT_SUCCESS; - } + else + ret = batadv_send_unicast_skb(skb, neigh_node); out: if (neigh_node) @@ -318,7 +328,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr; - int ret = NET_XMIT_DROP; + int res, ret = NET_XMIT_DROP; if (!orig_node) goto out; @@ -355,7 +365,8 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) unicast_packet->ttvn = unicast_packet->ttvn - 1; - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res != -1) ret = NET_XMIT_SUCCESS; out: @@ -428,27 +439,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, orig_node, vid); } -void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) -{ - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) - return; - - /* the interface gets activated here to avoid race conditions between - * the moment of activating the interface in - * hardif_activate_interface() where the originator mac is set and - * outdated packets (especially uninitialized mac addresses) in the - * packet queue - */ - if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) - hard_iface->if_status = BATADV_IF_ACTIVE; - - bat_priv->bat_algo_ops->bat_ogm_schedule(hard_iface); -} - -static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) +void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) { kfree_skb(forw_packet->skb); if (forw_packet->if_incoming) @@ -604,45 +595,6 @@ out: atomic_inc(&bat_priv->bcast_queue_left); } -void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work) -{ - struct delayed_work *delayed_work; - struct batadv_forw_packet *forw_packet; - struct batadv_priv *bat_priv; - - delayed_work = to_delayed_work(work); - forw_packet = container_of(delayed_work, struct batadv_forw_packet, - delayed_work); - bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); - spin_lock_bh(&bat_priv->forw_bat_list_lock); - hlist_del(&forw_packet->list); - spin_unlock_bh(&bat_priv->forw_bat_list_lock); - - if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) - goto out; - - bat_priv->bat_algo_ops->bat_ogm_emit(forw_packet); - - /* we have to have at least one packet in the queue to determine the - * queues wake up time unless we are shutting down. - * - * only re-schedule if this is the "original" copy, e.g. the OGM of the - * primary interface should only be rescheduled once per period, but - * this function will be called for the forw_packet instances of the - * other secondary interfaces as well. - */ - if (forw_packet->own && - forw_packet->if_incoming == forw_packet->if_outgoing) - batadv_schedule_bat_ogm(forw_packet->if_incoming); - -out: - /* don't count own packet */ - if (!forw_packet->own) - atomic_inc(&bat_priv->batman_queue_left); - - batadv_forw_packet_free(forw_packet); -} - void batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface) diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 6fd7270d8ce6..7cecb7563b45 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -26,8 +26,8 @@ #include "packet.h" struct sk_buff; -struct work_struct; +void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet); int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); @@ -38,11 +38,9 @@ int batadv_send_broadcast_skb(struct sk_buff *skb, struct batadv_hard_iface *hard_iface); int batadv_send_unicast_skb(struct sk_buff *skb, struct batadv_neigh_node *neigh_node); -void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, unsigned long delay); -void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work); void batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 287a3879ed7e..7527c0652dd5 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -48,6 +48,7 @@ #include <linux/types.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" @@ -255,7 +256,7 @@ static int batadv_interface_tx(struct sk_buff *skb, if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) goto dropped; - gw_mode = atomic_read(&bat_priv->gw_mode); + gw_mode = atomic_read(&bat_priv->gw.mode); if (is_multicast_ether_addr(ethhdr->h_dest)) { /* if gw mode is off, broadcast every packet */ if (gw_mode == BATADV_GW_MODE_OFF) { @@ -808,6 +809,10 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->distributed_arp_table, 1); #endif #ifdef CONFIG_BATMAN_ADV_MCAST + bat_priv->mcast.querier_ipv4.exists = false; + bat_priv->mcast.querier_ipv4.shadowing = false; + bat_priv->mcast.querier_ipv6.exists = false; + bat_priv->mcast.querier_ipv6.shadowing = false; bat_priv->mcast.flags = BATADV_NO_FLAGS; atomic_set(&bat_priv->multicast_mode, 1); atomic_set(&bat_priv->mcast.num_disabled, 0); @@ -815,8 +820,8 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); #endif - atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF); - atomic_set(&bat_priv->gw_sel_class, 20); + atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); + atomic_set(&bat_priv->gw.sel_class, 20); atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); @@ -837,6 +842,8 @@ static int batadv_softif_init_late(struct net_device *dev) #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); #endif + atomic_set(&bat_priv->tp_num, 0); + bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; bat_priv->isolation_mark = 0; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 414b2074165f..fe9ca94ddee2 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -25,8 +25,8 @@ #include <linux/fs.h> #include <linux/if.h> #include <linux/if_vlan.h> -#include <linux/kref.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/rculist.h> @@ -38,11 +38,12 @@ #include <linux/string.h> #include <linux/stringify.h> +#include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "gateway_common.h" -#include "bridge_loop_avoidance.h" #include "hard-interface.h" +#include "log.h" #include "network-coding.h" #include "packet.h" #include "soft-interface.h" @@ -389,12 +390,12 @@ static int batadv_store_uint_attr(const char *buff, size_t count, return count; } -static inline ssize_t -__batadv_store_uint_attr(const char *buff, size_t count, - int min, int max, - void (*post_func)(struct net_device *), - const struct attribute *attr, - atomic_t *attr_store, struct net_device *net_dev) +static ssize_t __batadv_store_uint_attr(const char *buff, size_t count, + int min, int max, + void (*post_func)(struct net_device *), + const struct attribute *attr, + atomic_t *attr_store, + struct net_device *net_dev) { int ret; @@ -411,7 +412,7 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj, { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name); + return sprintf(buff, "%s\n", bat_priv->algo_ops->name); } static void batadv_post_gw_reselect(struct net_device *net_dev) @@ -427,7 +428,7 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); int bytes_written; - switch (atomic_read(&bat_priv->gw_mode)) { + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_CLIENT: bytes_written = sprintf(buff, "%s\n", BATADV_GW_MODE_CLIENT_NAME); @@ -476,10 +477,10 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, return -EINVAL; } - if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp) + if (atomic_read(&bat_priv->gw.mode) == gw_mode_tmp) return count; - switch (atomic_read(&bat_priv->gw_mode)) { + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_CLIENT: curr_gw_mode_str = BATADV_GW_MODE_CLIENT_NAME; break; @@ -508,7 +509,7 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, * state */ batadv_gw_check_client_stop(bat_priv); - atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp); + atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp); batadv_gw_tvlv_container_update(bat_priv); return count; } @@ -624,7 +625,7 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER, INT_MAX, NULL); BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE, NULL); -BATADV_ATTR_SIF_UINT(gw_sel_class, gw_sel_class, S_IRUGO | S_IWUSR, 1, +BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1, BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect); static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, batadv_store_gw_bwidth); diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c new file mode 100644 index 000000000000..2333777f919d --- /dev/null +++ b/net/batman-adv/tp_meter.c @@ -0,0 +1,1507 @@ +/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors: + * + * Edo Monticelli, Antonio Quartulli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "tp_meter.h" +#include "main.h" + +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/byteorder/generic.h> +#include <linux/cache.h> +#include <linux/compiler.h> +#include <linux/device.h> +#include <linux/etherdevice.h> +#include <linux/fs.h> +#include <linux/if_ether.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/kref.h> +#include <linux/kthread.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/param.h> +#include <linux/printk.h> +#include <linux/random.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <uapi/linux/batman_adv.h> + +#include "hard-interface.h" +#include "log.h" +#include "netlink.h" +#include "originator.h" +#include "packet.h" +#include "send.h" + +/** + * BATADV_TP_DEF_TEST_LENGTH - Default test length if not specified by the user + * in milliseconds + */ +#define BATADV_TP_DEF_TEST_LENGTH 10000 + +/** + * BATADV_TP_AWND - Advertised window by the receiver (in bytes) + */ +#define BATADV_TP_AWND 0x20000000 + +/** + * BATADV_TP_RECV_TIMEOUT - Receiver activity timeout. If the receiver does not + * get anything for such amount of milliseconds, the connection is killed + */ +#define BATADV_TP_RECV_TIMEOUT 1000 + +/** + * BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond + * such amound of milliseconds, the receiver is considered unreachable and the + * connection is killed + */ +#define BATADV_TP_MAX_RTO 30000 + +/** + * BATADV_TP_FIRST_SEQ - First seqno of each session. The number is rather high + * in order to immediately trigger a wrap around (test purposes) + */ +#define BATADV_TP_FIRST_SEQ ((u32)-1 - 2000) + +/** + * BATADV_TP_PLEN - length of the payload (data after the batadv_unicast header) + * to simulate + */ +#define BATADV_TP_PLEN (BATADV_TP_PACKET_LEN - ETH_HLEN - \ + sizeof(struct batadv_unicast_packet)) + +static u8 batadv_tp_prerandom[4096] __read_mostly; + +/** + * batadv_tp_session_cookie - generate session cookie based on session ids + * @session: TP session identifier + * @icmp_uid: icmp pseudo uid of the tp session + * + * Return: 32 bit tp_meter session cookie + */ +static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid) +{ + u32 cookie; + + cookie = icmp_uid << 16; + cookie |= session[0] << 8; + cookie |= session[1]; + + return cookie; +} + +/** + * batadv_tp_cwnd - compute the new cwnd size + * @base: base cwnd size value + * @increment: the value to add to base to get the new size + * @min: minumim cwnd value (usually MSS) + * + * Return the new cwnd size and ensures it does not exceed the Advertised + * Receiver Window size. It is wrap around safe. + * For details refer to Section 3.1 of RFC5681 + * + * Return: new congestion window size in bytes + */ +static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min) +{ + u32 new_size = base + increment; + + /* check for wrap-around */ + if (new_size < base) + new_size = (u32)ULONG_MAX; + + new_size = min_t(u32, new_size, BATADV_TP_AWND); + + return max_t(u32, new_size, min); +} + +/** + * batadv_tp_updated_cwnd - update the Congestion Windows + * @tp_vars: the private data of the current TP meter session + * @mss: maximum segment size of transmission + * + * 1) if the session is in Slow Start, the CWND has to be increased by 1 + * MSS every unique received ACK + * 2) if the session is in Congestion Avoidance, the CWND has to be + * increased by MSS * MSS / CWND for every unique received ACK + */ +static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss) +{ + spin_lock_bh(&tp_vars->cwnd_lock); + + /* slow start... */ + if (tp_vars->cwnd <= tp_vars->ss_threshold) { + tp_vars->dec_cwnd = 0; + tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss); + spin_unlock_bh(&tp_vars->cwnd_lock); + return; + } + + /* increment CWND at least of 1 (section 3.1 of RFC5681) */ + tp_vars->dec_cwnd += max_t(u32, 1U << 3, + ((mss * mss) << 6) / (tp_vars->cwnd << 3)); + if (tp_vars->dec_cwnd < (mss << 3)) { + spin_unlock_bh(&tp_vars->cwnd_lock); + return; + } + + tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss); + tp_vars->dec_cwnd = 0; + + spin_unlock_bh(&tp_vars->cwnd_lock); +} + +/** + * batadv_tp_update_rto - calculate new retransmission timeout + * @tp_vars: the private data of the current TP meter session + * @new_rtt: new roundtrip time in msec + */ +static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars, + u32 new_rtt) +{ + long m = new_rtt; + + /* RTT update + * Details in Section 2.2 and 2.3 of RFC6298 + * + * It's tricky to understand. Don't lose hair please. + * Inspired by tcp_rtt_estimator() tcp_input.c + */ + if (tp_vars->srtt != 0) { + m -= (tp_vars->srtt >> 3); /* m is now error in rtt est */ + tp_vars->srtt += m; /* rtt = 7/8 srtt + 1/8 new */ + if (m < 0) + m = -m; + + m -= (tp_vars->rttvar >> 2); + tp_vars->rttvar += m; /* mdev ~= 3/4 rttvar + 1/4 new */ + } else { + /* first measure getting in */ + tp_vars->srtt = m << 3; /* take the measured time to be srtt */ + tp_vars->rttvar = m << 1; /* new_rtt / 2 */ + } + + /* rto = srtt + 4 * rttvar. + * rttvar is scaled by 4, therefore doesn't need to be multiplied + */ + tp_vars->rto = (tp_vars->srtt >> 3) + tp_vars->rttvar; +} + +/** + * batadv_tp_batctl_notify - send client status result to client + * @reason: reason for tp meter session stop + * @dst: destination of tp_meter session + * @bat_priv: the bat priv with all the soft interface information + * @start_time: start of transmission in jiffies + * @total_sent: bytes acked to the receiver + * @cookie: cookie of tp_meter session + */ +static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason, + const u8 *dst, struct batadv_priv *bat_priv, + unsigned long start_time, u64 total_sent, + u32 cookie) +{ + u32 test_time; + u8 result; + u32 total_bytes; + + if (!batadv_tp_is_error(reason)) { + result = BATADV_TP_REASON_COMPLETE; + test_time = jiffies_to_msecs(jiffies - start_time); + total_bytes = total_sent; + } else { + result = reason; + test_time = 0; + total_bytes = 0; + } + + batadv_netlink_tpmeter_notify(bat_priv, dst, result, test_time, + total_bytes, cookie); +} + +/** + * batadv_tp_batctl_error_notify - send client error result to client + * @reason: reason for tp meter session stop + * @dst: destination of tp_meter session + * @bat_priv: the bat priv with all the soft interface information + * @cookie: cookie of tp_meter session + */ +static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, + const u8 *dst, + struct batadv_priv *bat_priv, + u32 cookie) +{ + batadv_tp_batctl_notify(reason, dst, bat_priv, 0, 0, cookie); +} + +/** + * batadv_tp_list_find - find a tp_vars object in the global list + * @bat_priv: the bat priv with all the soft interface information + * @dst: the other endpoint MAC address to look for + * + * Look for a tp_vars object matching dst as end_point and return it after + * having incremented the refcounter. Return NULL is not found + * + * Return: matching tp_vars or NULL when no tp_vars with @dst was found + */ +static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, + const u8 *dst) +{ + struct batadv_tp_vars *pos, *tp_vars = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) { + if (!batadv_compare_eth(pos->other_end, dst)) + continue; + + /* most of the time this function is invoked during the normal + * process..it makes sens to pay more when the session is + * finished and to speed the process up during the measurement + */ + if (unlikely(!kref_get_unless_zero(&pos->refcount))) + continue; + + tp_vars = pos; + break; + } + rcu_read_unlock(); + + return tp_vars; +} + +/** + * batadv_tp_list_find_session - find tp_vars session object in the global list + * @bat_priv: the bat priv with all the soft interface information + * @dst: the other endpoint MAC address to look for + * @session: session identifier + * + * Look for a tp_vars object matching dst as end_point, session as tp meter + * session and return it after having incremented the refcounter. Return NULL + * is not found + * + * Return: matching tp_vars or NULL when no tp_vars was found + */ +static struct batadv_tp_vars * +batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, + const u8 *session) +{ + struct batadv_tp_vars *pos, *tp_vars = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) { + if (!batadv_compare_eth(pos->other_end, dst)) + continue; + + if (memcmp(pos->session, session, sizeof(pos->session)) != 0) + continue; + + /* most of the time this function is invoked during the normal + * process..it makes sense to pay more when the session is + * finished and to speed the process up during the measurement + */ + if (unlikely(!kref_get_unless_zero(&pos->refcount))) + continue; + + tp_vars = pos; + break; + } + rcu_read_unlock(); + + return tp_vars; +} + +/** + * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for + * free after rcu grace period + * @ref: kref pointer of the batadv_tp_vars + */ +static void batadv_tp_vars_release(struct kref *ref) +{ + struct batadv_tp_vars *tp_vars; + struct batadv_tp_unacked *un, *safe; + + tp_vars = container_of(ref, struct batadv_tp_vars, refcount); + + /* lock should not be needed because this object is now out of any + * context! + */ + spin_lock_bh(&tp_vars->unacked_lock); + list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { + list_del(&un->list); + kfree(un); + } + spin_unlock_bh(&tp_vars->unacked_lock); + + kfree_rcu(tp_vars, rcu); +} + +/** + * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly + * release it + * @tp_vars: the private data of the current TP meter session to be free'd + */ +static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars) +{ + kref_put(&tp_vars->refcount, batadv_tp_vars_release); +} + +/** + * batadv_tp_sender_cleanup - cleanup sender data and drop and timer + * @bat_priv: the bat priv with all the soft interface information + * @tp_vars: the private data of the current TP meter session to cleanup + */ +static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, + struct batadv_tp_vars *tp_vars) +{ + cancel_delayed_work(&tp_vars->finish_work); + + spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); + hlist_del_rcu(&tp_vars->list); + spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); + + /* drop list reference */ + batadv_tp_vars_put(tp_vars); + + atomic_dec(&tp_vars->bat_priv->tp_num); + + /* kill the timer and remove its reference */ + del_timer_sync(&tp_vars->timer); + /* the worker might have rearmed itself therefore we kill it again. Note + * that if the worker should run again before invoking the following + * del_timer(), it would not re-arm itself once again because the status + * is OFF now + */ + del_timer(&tp_vars->timer); + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_sender_end - print info about ended session and inform client + * @bat_priv: the bat priv with all the soft interface information + * @tp_vars: the private data of the current TP meter session + */ +static void batadv_tp_sender_end(struct batadv_priv *bat_priv, + struct batadv_tp_vars *tp_vars) +{ + u32 session_cookie; + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Test towards %pM finished..shutting down (reason=%d)\n", + tp_vars->other_end, tp_vars->reason); + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n", + tp_vars->srtt >> 3, tp_vars->rttvar >> 2, tp_vars->rto); + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Final values: cwnd=%u ss_threshold=%u\n", + tp_vars->cwnd, tp_vars->ss_threshold); + + session_cookie = batadv_tp_session_cookie(tp_vars->session, + tp_vars->icmp_uid); + + batadv_tp_batctl_notify(tp_vars->reason, + tp_vars->other_end, + bat_priv, + tp_vars->start_time, + atomic64_read(&tp_vars->tot_sent), + session_cookie); +} + +/** + * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully + * @tp_vars: the private data of the current TP meter session + * @reason: reason for tp meter session stop + */ +static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars, + enum batadv_tp_meter_reason reason) +{ + if (!atomic_dec_and_test(&tp_vars->sending)) + return; + + tp_vars->reason = reason; +} + +/** + * batadv_tp_sender_finish - stop sender session after test_length was reached + * @work: delayed work reference of the related tp_vars + */ +static void batadv_tp_sender_finish(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_tp_vars *tp_vars; + + delayed_work = to_delayed_work(work); + tp_vars = container_of(delayed_work, struct batadv_tp_vars, + finish_work); + + batadv_tp_sender_shutdown(tp_vars, BATADV_TP_REASON_COMPLETE); +} + +/** + * batadv_tp_reset_sender_timer - reschedule the sender timer + * @tp_vars: the private TP meter data for this session + * + * Reschedule the timer using tp_vars->rto as delay + */ +static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) +{ + /* most of the time this function is invoked while normal packet + * reception... + */ + if (unlikely(atomic_read(&tp_vars->sending) == 0)) + /* timer ref will be dropped in batadv_tp_sender_cleanup */ + return; + + mod_timer(&tp_vars->timer, jiffies + msecs_to_jiffies(tp_vars->rto)); +} + +/** + * batadv_tp_sender_timeout - timer that fires in case of packet loss + * @arg: address of the related tp_vars + * + * If fired it means that there was packet loss. + * Switch to Slow Start, set the ss_threshold to half of the current cwnd and + * reset the cwnd to 3*MSS + */ +static void batadv_tp_sender_timeout(unsigned long arg) +{ + struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; + struct batadv_priv *bat_priv = tp_vars->bat_priv; + + if (atomic_read(&tp_vars->sending) == 0) + return; + + /* if the user waited long enough...shutdown the test */ + if (unlikely(tp_vars->rto >= BATADV_TP_MAX_RTO)) { + batadv_tp_sender_shutdown(tp_vars, + BATADV_TP_REASON_DST_UNREACHABLE); + return; + } + + /* RTO exponential backoff + * Details in Section 5.5 of RFC6298 + */ + tp_vars->rto <<= 1; + + spin_lock_bh(&tp_vars->cwnd_lock); + + tp_vars->ss_threshold = tp_vars->cwnd >> 1; + if (tp_vars->ss_threshold < BATADV_TP_PLEN * 2) + tp_vars->ss_threshold = BATADV_TP_PLEN * 2; + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: RTO fired during test towards %pM! cwnd=%u new ss_thr=%u, resetting last_sent to %u\n", + tp_vars->other_end, tp_vars->cwnd, tp_vars->ss_threshold, + atomic_read(&tp_vars->last_acked)); + + tp_vars->cwnd = BATADV_TP_PLEN * 3; + + spin_unlock_bh(&tp_vars->cwnd_lock); + + /* resend the non-ACKed packets.. */ + tp_vars->last_sent = atomic_read(&tp_vars->last_acked); + wake_up(&tp_vars->more_bytes); + + batadv_tp_reset_sender_timer(tp_vars); +} + +/** + * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes + * @tp_vars: the private TP meter data for this session + * @buf: Buffer to fill with bytes + * @nbytes: amount of pseudorandom bytes + */ +static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars, + u8 *buf, size_t nbytes) +{ + u32 local_offset; + size_t bytes_inbuf; + size_t to_copy; + size_t pos = 0; + + spin_lock_bh(&tp_vars->prerandom_lock); + local_offset = tp_vars->prerandom_offset; + tp_vars->prerandom_offset += nbytes; + tp_vars->prerandom_offset %= sizeof(batadv_tp_prerandom); + spin_unlock_bh(&tp_vars->prerandom_lock); + + while (nbytes) { + local_offset %= sizeof(batadv_tp_prerandom); + bytes_inbuf = sizeof(batadv_tp_prerandom) - local_offset; + to_copy = min(nbytes, bytes_inbuf); + + memcpy(&buf[pos], &batadv_tp_prerandom[local_offset], to_copy); + pos += to_copy; + nbytes -= to_copy; + local_offset = 0; + } +} + +/** + * batadv_tp_send_msg - send a single message + * @tp_vars: the private TP meter data for this session + * @src: source mac address + * @orig_node: the originator of the destination + * @seqno: sequence number of this packet + * @len: length of the entire packet + * @session: session identifier + * @uid: local ICMP "socket" index + * @timestamp: timestamp in jiffies which is replied in ack + * + * Create and send a single TP Meter message. + * + * Return: 0 on success, BATADV_TP_REASON_DST_UNREACHABLE if the destination is + * not reachable, BATADV_TP_REASON_MEMORY_ERROR if the packet couldn't be + * allocated + */ +static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, + struct batadv_orig_node *orig_node, + u32 seqno, size_t len, const u8 *session, + int uid, u32 timestamp) +{ + struct batadv_icmp_tp_packet *icmp; + struct sk_buff *skb; + int r; + u8 *data; + size_t data_len; + + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); + if (unlikely(!skb)) + return BATADV_TP_REASON_MEMORY_ERROR; + + skb_reserve(skb, ETH_HLEN); + icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp)); + + /* fill the icmp header */ + ether_addr_copy(icmp->dst, orig_node->orig); + ether_addr_copy(icmp->orig, src); + icmp->version = BATADV_COMPAT_VERSION; + icmp->packet_type = BATADV_ICMP; + icmp->ttl = BATADV_TTL; + icmp->msg_type = BATADV_TP; + icmp->uid = uid; + + icmp->subtype = BATADV_TP_MSG; + memcpy(icmp->session, session, sizeof(icmp->session)); + icmp->seqno = htonl(seqno); + icmp->timestamp = htonl(timestamp); + + data_len = len - sizeof(*icmp); + data = (u8 *)skb_put(skb, data_len); + batadv_tp_fill_prerandom(tp_vars, data, data_len); + + r = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (r == -1) + kfree_skb(skb); + + if (r == NET_XMIT_SUCCESS) + return 0; + + return BATADV_TP_REASON_CANT_SEND; +} + +/** + * batadv_tp_recv_ack - ACK receiving function + * @bat_priv: the bat priv with all the soft interface information + * @skb: the buffer containing the received packet + * + * Process a received TP ACK packet + */ +static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, + const struct sk_buff *skb) +{ + struct batadv_hard_iface *primary_if = NULL; + struct batadv_orig_node *orig_node = NULL; + const struct batadv_icmp_tp_packet *icmp; + struct batadv_tp_vars *tp_vars; + size_t packet_len, mss; + u32 rtt, recv_ack, cwnd; + unsigned char *dev_addr; + + packet_len = BATADV_TP_PLEN; + mss = BATADV_TP_PLEN; + packet_len += sizeof(struct batadv_unicast_packet); + + icmp = (struct batadv_icmp_tp_packet *)skb->data; + + /* find the tp_vars */ + tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, + icmp->session); + if (unlikely(!tp_vars)) + return; + + if (unlikely(atomic_read(&tp_vars->sending) == 0)) + goto out; + + /* old ACK? silently drop it.. */ + if (batadv_seq_before(ntohl(icmp->seqno), + (u32)atomic_read(&tp_vars->last_acked))) + goto out; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (unlikely(!primary_if)) + goto out; + + orig_node = batadv_orig_hash_find(bat_priv, icmp->orig); + if (unlikely(!orig_node)) + goto out; + + /* update RTO with the new sampled RTT, if any */ + rtt = jiffies_to_msecs(jiffies) - ntohl(icmp->timestamp); + if (icmp->timestamp && rtt) + batadv_tp_update_rto(tp_vars, rtt); + + /* ACK for new data... reset the timer */ + batadv_tp_reset_sender_timer(tp_vars); + + recv_ack = ntohl(icmp->seqno); + + /* check if this ACK is a duplicate */ + if (atomic_read(&tp_vars->last_acked) == recv_ack) { + atomic_inc(&tp_vars->dup_acks); + if (atomic_read(&tp_vars->dup_acks) != 3) + goto out; + + if (recv_ack >= tp_vars->recover) + goto out; + + /* if this is the third duplicate ACK do Fast Retransmit */ + batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr, + orig_node, recv_ack, packet_len, + icmp->session, icmp->uid, + jiffies_to_msecs(jiffies)); + + spin_lock_bh(&tp_vars->cwnd_lock); + + /* Fast Recovery */ + tp_vars->fast_recovery = true; + /* Set recover to the last outstanding seqno when Fast Recovery + * is entered. RFC6582, Section 3.2, step 1 + */ + tp_vars->recover = tp_vars->last_sent; + tp_vars->ss_threshold = tp_vars->cwnd >> 1; + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: Fast Recovery, (cur cwnd=%u) ss_thr=%u last_sent=%u recv_ack=%u\n", + tp_vars->cwnd, tp_vars->ss_threshold, + tp_vars->last_sent, recv_ack); + tp_vars->cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 3 * mss, + mss); + tp_vars->dec_cwnd = 0; + tp_vars->last_sent = recv_ack; + + spin_unlock_bh(&tp_vars->cwnd_lock); + } else { + /* count the acked data */ + atomic64_add(recv_ack - atomic_read(&tp_vars->last_acked), + &tp_vars->tot_sent); + /* reset the duplicate ACKs counter */ + atomic_set(&tp_vars->dup_acks, 0); + + if (tp_vars->fast_recovery) { + /* partial ACK */ + if (batadv_seq_before(recv_ack, tp_vars->recover)) { + /* this is another hole in the window. React + * immediately as specified by NewReno (see + * Section 3.2 of RFC6582 for details) + */ + dev_addr = primary_if->net_dev->dev_addr; + batadv_tp_send_msg(tp_vars, dev_addr, + orig_node, recv_ack, + packet_len, icmp->session, + icmp->uid, + jiffies_to_msecs(jiffies)); + tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, + mss, mss); + } else { + tp_vars->fast_recovery = false; + /* set cwnd to the value of ss_threshold at the + * moment that Fast Recovery was entered. + * RFC6582, Section 3.2, step 3 + */ + cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 0, + mss); + tp_vars->cwnd = cwnd; + } + goto move_twnd; + } + + if (recv_ack - atomic_read(&tp_vars->last_acked) >= mss) + batadv_tp_update_cwnd(tp_vars, mss); +move_twnd: + /* move the Transmit Window */ + atomic_set(&tp_vars->last_acked, recv_ack); + } + + wake_up(&tp_vars->more_bytes); +out: + if (likely(primary_if)) + batadv_hardif_put(primary_if); + if (likely(orig_node)) + batadv_orig_node_put(orig_node); + if (likely(tp_vars)) + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_avail - check if congestion window is not full + * @tp_vars: the private data of the current TP meter session + * @payload_len: size of the payload of a single message + * + * Return: true when congestion window is not full, false otherwise + */ +static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars, + size_t payload_len) +{ + u32 win_left, win_limit; + + win_limit = atomic_read(&tp_vars->last_acked) + tp_vars->cwnd; + win_left = win_limit - tp_vars->last_sent; + + return win_left >= payload_len; +} + +/** + * batadv_tp_wait_available - wait until congestion window becomes free or + * timeout is reached + * @tp_vars: the private data of the current TP meter session + * @plen: size of the payload of a single message + * + * Return: 0 if the condition evaluated to false after the timeout elapsed, + * 1 if the condition evaluated to true after the timeout elapsed, the + * remaining jiffies (at least 1) if the condition evaluated to true before + * the timeout elapsed, or -ERESTARTSYS if it was interrupted by a signal. + */ +static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen) +{ + int ret; + + ret = wait_event_interruptible_timeout(tp_vars->more_bytes, + batadv_tp_avail(tp_vars, plen), + HZ / 10); + + return ret; +} + +/** + * batadv_tp_send - main sending thread of a tp meter session + * @arg: address of the related tp_vars + * + * Return: nothing, this function never returns + */ +static int batadv_tp_send(void *arg) +{ + struct batadv_tp_vars *tp_vars = arg; + struct batadv_priv *bat_priv = tp_vars->bat_priv; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_orig_node *orig_node = NULL; + size_t payload_len, packet_len; + int err = 0; + + if (unlikely(tp_vars->role != BATADV_TP_SENDER)) { + err = BATADV_TP_REASON_DST_UNREACHABLE; + tp_vars->reason = err; + goto out; + } + + orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end); + if (unlikely(!orig_node)) { + err = BATADV_TP_REASON_DST_UNREACHABLE; + tp_vars->reason = err; + goto out; + } + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (unlikely(!primary_if)) { + err = BATADV_TP_REASON_DST_UNREACHABLE; + goto out; + } + + /* assume that all the hard_interfaces have a correctly + * configured MTU, so use the soft_iface MTU as MSS. + * This might not be true and in that case the fragmentation + * should be used. + * Now, try to send the packet as it is + */ + payload_len = BATADV_TP_PLEN; + BUILD_BUG_ON(sizeof(struct batadv_icmp_tp_packet) > BATADV_TP_PLEN); + + batadv_tp_reset_sender_timer(tp_vars); + + /* queue the worker in charge of terminating the test */ + queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work, + msecs_to_jiffies(tp_vars->test_length)); + + while (atomic_read(&tp_vars->sending) != 0) { + if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) { + batadv_tp_wait_available(tp_vars, payload_len); + continue; + } + + /* to emulate normal unicast traffic, add to the payload len + * the size of the unicast header + */ + packet_len = payload_len + sizeof(struct batadv_unicast_packet); + + err = batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr, + orig_node, tp_vars->last_sent, + packet_len, + tp_vars->session, tp_vars->icmp_uid, + jiffies_to_msecs(jiffies)); + + /* something went wrong during the preparation/transmission */ + if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: batadv_tp_send() cannot send packets (%d)\n", + err); + /* ensure nobody else tries to stop the thread now */ + if (atomic_dec_and_test(&tp_vars->sending)) + tp_vars->reason = err; + break; + } + + /* right-shift the TWND */ + if (!err) + tp_vars->last_sent += payload_len; + + cond_resched(); + } + +out: + if (likely(primary_if)) + batadv_hardif_put(primary_if); + if (likely(orig_node)) + batadv_orig_node_put(orig_node); + + batadv_tp_sender_end(bat_priv, tp_vars); + batadv_tp_sender_cleanup(bat_priv, tp_vars); + + batadv_tp_vars_put(tp_vars); + + do_exit(0); +} + +/** + * batadv_tp_start_kthread - start new thread which manages the tp meter sender + * @tp_vars: the private data of the current TP meter session + */ +static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) +{ + struct task_struct *kthread; + struct batadv_priv *bat_priv = tp_vars->bat_priv; + u32 session_cookie; + + kref_get(&tp_vars->refcount); + kthread = kthread_create(batadv_tp_send, tp_vars, "kbatadv_tp_meter"); + if (IS_ERR(kthread)) { + session_cookie = batadv_tp_session_cookie(tp_vars->session, + tp_vars->icmp_uid); + pr_err("batadv: cannot create tp meter kthread\n"); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, + tp_vars->other_end, + bat_priv, session_cookie); + + /* drop reserved reference for kthread */ + batadv_tp_vars_put(tp_vars); + + /* cleanup of failed tp meter variables */ + batadv_tp_sender_cleanup(bat_priv, tp_vars); + return; + } + + wake_up_process(kthread); +} + +/** + * batadv_tp_start - start a new tp meter session + * @bat_priv: the bat priv with all the soft interface information + * @dst: the receiver MAC address + * @test_length: test length in milliseconds + * @cookie: session cookie + */ +void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, + u32 test_length, u32 *cookie) +{ + struct batadv_tp_vars *tp_vars; + u8 session_id[2]; + u8 icmp_uid; + u32 session_cookie; + + get_random_bytes(session_id, sizeof(session_id)); + get_random_bytes(&icmp_uid, 1); + session_cookie = batadv_tp_session_cookie(session_id, icmp_uid); + *cookie = session_cookie; + + /* look for an already existing test towards this node */ + spin_lock_bh(&bat_priv->tp_list_lock); + tp_vars = batadv_tp_list_find(bat_priv, dst); + if (tp_vars) { + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_tp_vars_put(tp_vars); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: test to or from the same node already ongoing, aborting\n"); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING, + dst, bat_priv, session_cookie); + return; + } + + if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) { + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: too many ongoing sessions, aborting (SEND)\n"); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_TOO_MANY, dst, + bat_priv, session_cookie); + return; + } + + tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC); + if (!tp_vars) { + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: batadv_tp_start cannot allocate list elements\n"); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, + dst, bat_priv, session_cookie); + return; + } + + /* initialize tp_vars */ + ether_addr_copy(tp_vars->other_end, dst); + kref_init(&tp_vars->refcount); + tp_vars->role = BATADV_TP_SENDER; + atomic_set(&tp_vars->sending, 1); + memcpy(tp_vars->session, session_id, sizeof(session_id)); + tp_vars->icmp_uid = icmp_uid; + + tp_vars->last_sent = BATADV_TP_FIRST_SEQ; + atomic_set(&tp_vars->last_acked, BATADV_TP_FIRST_SEQ); + tp_vars->fast_recovery = false; + tp_vars->recover = BATADV_TP_FIRST_SEQ; + + /* initialise the CWND to 3*MSS (Section 3.1 in RFC5681). + * For batman-adv the MSS is the size of the payload received by the + * soft_interface, hence its MTU + */ + tp_vars->cwnd = BATADV_TP_PLEN * 3; + /* at the beginning initialise the SS threshold to the biggest possible + * window size, hence the AWND size + */ + tp_vars->ss_threshold = BATADV_TP_AWND; + + /* RTO initial value is 3 seconds. + * Details in Section 2.1 of RFC6298 + */ + tp_vars->rto = 1000; + tp_vars->srtt = 0; + tp_vars->rttvar = 0; + + atomic64_set(&tp_vars->tot_sent, 0); + + kref_get(&tp_vars->refcount); + setup_timer(&tp_vars->timer, batadv_tp_sender_timeout, + (unsigned long)tp_vars); + + tp_vars->bat_priv = bat_priv; + tp_vars->start_time = jiffies; + + init_waitqueue_head(&tp_vars->more_bytes); + + spin_lock_init(&tp_vars->unacked_lock); + INIT_LIST_HEAD(&tp_vars->unacked_list); + + spin_lock_init(&tp_vars->cwnd_lock); + + tp_vars->prerandom_offset = 0; + spin_lock_init(&tp_vars->prerandom_lock); + + kref_get(&tp_vars->refcount); + hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); + spin_unlock_bh(&bat_priv->tp_list_lock); + + tp_vars->test_length = test_length; + if (!tp_vars->test_length) + tp_vars->test_length = BATADV_TP_DEF_TEST_LENGTH; + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: starting throughput meter towards %pM (length=%ums)\n", + dst, test_length); + + /* init work item for finished tp tests */ + INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish); + + /* start tp kthread. This way the write() call issued from userspace can + * happily return and avoid to block + */ + batadv_tp_start_kthread(tp_vars); + + /* don't return reference to new tp_vars */ + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_stop - stop currently running tp meter session + * @bat_priv: the bat priv with all the soft interface information + * @dst: the receiver MAC address + * @return_value: reason for tp meter session stop + */ +void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, + u8 return_value) +{ + struct batadv_orig_node *orig_node; + struct batadv_tp_vars *tp_vars; + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: stopping test towards %pM\n", dst); + + orig_node = batadv_orig_hash_find(bat_priv, dst); + if (!orig_node) + return; + + tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig); + if (!tp_vars) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: trying to interrupt an already over connection\n"); + goto out; + } + + batadv_tp_sender_shutdown(tp_vars, return_value); + batadv_tp_vars_put(tp_vars); +out: + batadv_orig_node_put(orig_node); +} + +/** + * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer + * @tp_vars: the private data of the current TP meter session + * + * start the receiver shutdown timer or reset it if already started + */ +static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars) +{ + mod_timer(&tp_vars->timer, + jiffies + msecs_to_jiffies(BATADV_TP_RECV_TIMEOUT)); +} + +/** + * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is + * reached without received ack + * @arg: address of the related tp_vars + */ +static void batadv_tp_receiver_shutdown(unsigned long arg) +{ + struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; + struct batadv_tp_unacked *un, *safe; + struct batadv_priv *bat_priv; + + bat_priv = tp_vars->bat_priv; + + /* if there is recent activity rearm the timer */ + if (!batadv_has_timed_out(tp_vars->last_recv_time, + BATADV_TP_RECV_TIMEOUT)) { + /* reset the receiver shutdown timer */ + batadv_tp_reset_receiver_timer(tp_vars); + return; + } + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Shutting down for inactivity (more than %dms) from %pM\n", + BATADV_TP_RECV_TIMEOUT, tp_vars->other_end); + + spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); + hlist_del_rcu(&tp_vars->list); + spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); + + /* drop list reference */ + batadv_tp_vars_put(tp_vars); + + atomic_dec(&bat_priv->tp_num); + + spin_lock_bh(&tp_vars->unacked_lock); + list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { + list_del(&un->list); + kfree(un); + } + spin_unlock_bh(&tp_vars->unacked_lock); + + /* drop reference of timer */ + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_send_ack - send an ACK packet + * @bat_priv: the bat priv with all the soft interface information + * @dst: the mac address of the destination originator + * @seq: the sequence number to ACK + * @timestamp: the timestamp to echo back in the ACK + * @session: session identifier + * @socket_index: local ICMP socket identifier + * + * Return: 0 on success, a positive integer representing the reason of the + * failure otherwise + */ +static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, + u32 seq, __be32 timestamp, const u8 *session, + int socket_index) +{ + struct batadv_hard_iface *primary_if = NULL; + struct batadv_orig_node *orig_node; + struct batadv_icmp_tp_packet *icmp; + struct sk_buff *skb; + int r, ret; + + orig_node = batadv_orig_hash_find(bat_priv, dst); + if (unlikely(!orig_node)) { + ret = BATADV_TP_REASON_DST_UNREACHABLE; + goto out; + } + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (unlikely(!primary_if)) { + ret = BATADV_TP_REASON_DST_UNREACHABLE; + goto out; + } + + skb = netdev_alloc_skb_ip_align(NULL, sizeof(*icmp) + ETH_HLEN); + if (unlikely(!skb)) { + ret = BATADV_TP_REASON_MEMORY_ERROR; + goto out; + } + + skb_reserve(skb, ETH_HLEN); + icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp)); + icmp->packet_type = BATADV_ICMP; + icmp->version = BATADV_COMPAT_VERSION; + icmp->ttl = BATADV_TTL; + icmp->msg_type = BATADV_TP; + ether_addr_copy(icmp->dst, orig_node->orig); + ether_addr_copy(icmp->orig, primary_if->net_dev->dev_addr); + icmp->uid = socket_index; + + icmp->subtype = BATADV_TP_ACK; + memcpy(icmp->session, session, sizeof(icmp->session)); + icmp->seqno = htonl(seq); + icmp->timestamp = timestamp; + + /* send the ack */ + r = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (r == -1) + kfree_skb(skb); + + if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { + ret = BATADV_TP_REASON_DST_UNREACHABLE; + goto out; + } + ret = 0; + +out: + if (likely(orig_node)) + batadv_orig_node_put(orig_node); + if (likely(primary_if)) + batadv_hardif_put(primary_if); + + return ret; +} + +/** + * batadv_tp_handle_out_of_order - store an out of order packet + * @tp_vars: the private data of the current TP meter session + * @skb: the buffer containing the received packet + * + * Store the out of order packet in the unacked list for late processing. This + * packets are kept in this list so that they can be ACKed at once as soon as + * all the previous packets have been received + * + * Return: true if the packed has been successfully processed, false otherwise + */ +static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars, + const struct sk_buff *skb) +{ + const struct batadv_icmp_tp_packet *icmp; + struct batadv_tp_unacked *un, *new; + u32 payload_len; + bool added = false; + + new = kmalloc(sizeof(*new), GFP_ATOMIC); + if (unlikely(!new)) + return false; + + icmp = (struct batadv_icmp_tp_packet *)skb->data; + + new->seqno = ntohl(icmp->seqno); + payload_len = skb->len - sizeof(struct batadv_unicast_packet); + new->len = payload_len; + + spin_lock_bh(&tp_vars->unacked_lock); + /* if the list is empty immediately attach this new object */ + if (list_empty(&tp_vars->unacked_list)) { + list_add(&new->list, &tp_vars->unacked_list); + goto out; + } + + /* otherwise loop over the list and either drop the packet because this + * is a duplicate or store it at the right position. + * + * The iteration is done in the reverse way because it is likely that + * the last received packet (the one being processed now) has a bigger + * seqno than all the others already stored. + */ + list_for_each_entry_reverse(un, &tp_vars->unacked_list, list) { + /* check for duplicates */ + if (new->seqno == un->seqno) { + if (new->len > un->len) + un->len = new->len; + kfree(new); + added = true; + break; + } + + /* look for the right position */ + if (batadv_seq_before(new->seqno, un->seqno)) + continue; + + /* as soon as an entry having a bigger seqno is found, the new + * one is attached _after_ it. In this way the list is kept in + * ascending order + */ + list_add_tail(&new->list, &un->list); + added = true; + break; + } + + /* received packet with smallest seqno out of order; add it to front */ + if (!added) + list_add(&new->list, &tp_vars->unacked_list); + +out: + spin_unlock_bh(&tp_vars->unacked_lock); + + return true; +} + +/** + * batadv_tp_ack_unordered - update number received bytes in current stream + * without gaps + * @tp_vars: the private data of the current TP meter session + */ +static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars) +{ + struct batadv_tp_unacked *un, *safe; + u32 to_ack; + + /* go through the unacked packet list and possibly ACK them as + * well + */ + spin_lock_bh(&tp_vars->unacked_lock); + list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { + /* the list is ordered, therefore it is possible to stop as soon + * there is a gap between the last acked seqno and the seqno of + * the packet under inspection + */ + if (batadv_seq_before(tp_vars->last_recv, un->seqno)) + break; + + to_ack = un->seqno + un->len - tp_vars->last_recv; + + if (batadv_seq_before(tp_vars->last_recv, un->seqno + un->len)) + tp_vars->last_recv += to_ack; + + list_del(&un->list); + kfree(un); + } + spin_unlock_bh(&tp_vars->unacked_lock); +} + +/** + * batadv_tp_init_recv - return matching or create new receiver tp_vars + * @bat_priv: the bat priv with all the soft interface information + * @icmp: received icmp tp msg + * + * Return: corresponding tp_vars or NULL on errors + */ +static struct batadv_tp_vars * +batadv_tp_init_recv(struct batadv_priv *bat_priv, + const struct batadv_icmp_tp_packet *icmp) +{ + struct batadv_tp_vars *tp_vars; + + spin_lock_bh(&bat_priv->tp_list_lock); + tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, + icmp->session); + if (tp_vars) + goto out_unlock; + + if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: too many ongoing sessions, aborting (RECV)\n"); + goto out_unlock; + } + + tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC); + if (!tp_vars) + goto out_unlock; + + ether_addr_copy(tp_vars->other_end, icmp->orig); + tp_vars->role = BATADV_TP_RECEIVER; + memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session)); + tp_vars->last_recv = BATADV_TP_FIRST_SEQ; + tp_vars->bat_priv = bat_priv; + kref_init(&tp_vars->refcount); + + spin_lock_init(&tp_vars->unacked_lock); + INIT_LIST_HEAD(&tp_vars->unacked_list); + + kref_get(&tp_vars->refcount); + hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); + + kref_get(&tp_vars->refcount); + setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown, + (unsigned long)tp_vars); + + batadv_tp_reset_receiver_timer(tp_vars); + +out_unlock: + spin_unlock_bh(&bat_priv->tp_list_lock); + + return tp_vars; +} + +/** + * batadv_tp_recv_msg - process a single data message + * @bat_priv: the bat priv with all the soft interface information + * @skb: the buffer containing the received packet + * + * Process a received TP MSG packet + */ +static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, + const struct sk_buff *skb) +{ + const struct batadv_icmp_tp_packet *icmp; + struct batadv_tp_vars *tp_vars; + size_t packet_size; + u32 seqno; + + icmp = (struct batadv_icmp_tp_packet *)skb->data; + + seqno = ntohl(icmp->seqno); + /* check if this is the first seqno. This means that if the + * first packet is lost, the tp meter does not work anymore! + */ + if (seqno == BATADV_TP_FIRST_SEQ) { + tp_vars = batadv_tp_init_recv(bat_priv, icmp); + if (!tp_vars) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: seqno != BATADV_TP_FIRST_SEQ cannot initiate connection\n"); + goto out; + } + } else { + tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, + icmp->session); + if (!tp_vars) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Unexpected packet from %pM!\n", + icmp->orig); + goto out; + } + } + + if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: dropping packet: not expected (role=%u)\n", + tp_vars->role); + goto out; + } + + tp_vars->last_recv_time = jiffies; + + /* if the packet is a duplicate, it may be the case that an ACK has been + * lost. Resend the ACK + */ + if (batadv_seq_before(seqno, tp_vars->last_recv)) + goto send_ack; + + /* if the packet is out of order enqueue it */ + if (ntohl(icmp->seqno) != tp_vars->last_recv) { + /* exit immediately (and do not send any ACK) if the packet has + * not been enqueued correctly + */ + if (!batadv_tp_handle_out_of_order(tp_vars, skb)) + goto out; + + /* send a duplicate ACK */ + goto send_ack; + } + + /* if everything was fine count the ACKed bytes */ + packet_size = skb->len - sizeof(struct batadv_unicast_packet); + tp_vars->last_recv += packet_size; + + /* check if this ordered message filled a gap.... */ + batadv_tp_ack_unordered(tp_vars); + +send_ack: + /* send the ACK. If the received packet was out of order, the ACK that + * is going to be sent is a duplicate (the sender will count them and + * possibly enter Fast Retransmit as soon as it has reached 3) + */ + batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv, + icmp->timestamp, icmp->session, icmp->uid); +out: + if (likely(tp_vars)) + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_meter_recv - main TP Meter receiving function + * @bat_priv: the bat priv with all the soft interface information + * @skb: the buffer containing the received packet + */ +void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) +{ + struct batadv_icmp_tp_packet *icmp; + + icmp = (struct batadv_icmp_tp_packet *)skb->data; + + switch (icmp->subtype) { + case BATADV_TP_MSG: + batadv_tp_recv_msg(bat_priv, skb); + break; + case BATADV_TP_ACK: + batadv_tp_recv_ack(bat_priv, skb); + break; + default: + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Received unknown TP Metric packet type %u\n", + icmp->subtype); + } + consume_skb(skb); +} + +/** + * batadv_tp_meter_init - initialize global tp_meter structures + */ +void batadv_tp_meter_init(void) +{ + get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom)); +} diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h new file mode 100644 index 000000000000..ba922c425e56 --- /dev/null +++ b/net/batman-adv/tp_meter.h @@ -0,0 +1,34 @@ +/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors: + * + * Edo Monticelli, Antonio Quartulli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_TP_METER_H_ +#define _NET_BATMAN_ADV_TP_METER_H_ + +#include "main.h" + +#include <linux/types.h> + +struct sk_buff; + +void batadv_tp_meter_init(void); +void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, + u32 test_length, u32 *cookie); +void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, + u8 return_value); +void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb); + +#endif /* _NET_BATMAN_ADV_TP_METER_H_ */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 57ec87f37050..7e6df7a4964a 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -47,10 +47,12 @@ #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "multicast.h" #include "originator.h" #include "packet.h" #include "soft-interface.h" +#include "tvlv.h" /* hash class keys */ static struct lock_class_key batadv_tt_local_hash_lock_class_key; @@ -996,7 +998,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; struct hlist_head *head; - unsigned short vid; u32 i; int last_seen_secs; int last_seen_msecs; @@ -1023,7 +1024,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); - vid = tt_common_entry->vid; last_seen_jiffies = jiffies - tt_local->last_seen; last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); last_seen_secs = last_seen_msecs / 1000; @@ -1547,7 +1547,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global_entry) { struct batadv_neigh_node *router, *best_router = NULL; - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL; @@ -1559,8 +1559,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, continue; if (best_router && - bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT, - best_router, BATADV_IF_DEFAULT) <= 0) { + bao->neigh.cmp(router, BATADV_IF_DEFAULT, best_router, + BATADV_IF_DEFAULT) <= 0) { batadv_neigh_node_put(router); continue; } diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c new file mode 100644 index 000000000000..3d1cf0fb112d --- /dev/null +++ b/net/batman-adv/tvlv.c @@ -0,0 +1,632 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "main.h" + +#include <linux/byteorder/generic.h> +#include <linux/etherdevice.h> +#include <linux/fs.h> +#include <linux/if_ether.h> +#include <linux/kernel.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/lockdep.h> +#include <linux/netdevice.h> +#include <linux/pkt_sched.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/types.h> + +#include "originator.h" +#include "packet.h" +#include "send.h" +#include "tvlv.h" + +/** + * batadv_tvlv_handler_release - release tvlv handler from lists and queue for + * free after rcu grace period + * @ref: kref pointer of the tvlv + */ +static void batadv_tvlv_handler_release(struct kref *ref) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount); + kfree_rcu(tvlv_handler, rcu); +} + +/** + * batadv_tvlv_handler_put - decrement the tvlv container refcounter and + * possibly release it + * @tvlv_handler: the tvlv handler to free + */ +static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) +{ + kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release); +} + +/** + * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list + * based on the provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv handler type to look for + * @version: tvlv handler version to look for + * + * Return: tvlv handler if found or NULL otherwise. + */ +static struct batadv_tvlv_handler * +batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) +{ + struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tvlv_handler_tmp, + &bat_priv->tvlv.handler_list, list) { + if (tvlv_handler_tmp->type != type) + continue; + + if (tvlv_handler_tmp->version != version) + continue; + + if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount)) + continue; + + tvlv_handler = tvlv_handler_tmp; + break; + } + rcu_read_unlock(); + + return tvlv_handler; +} + +/** + * batadv_tvlv_container_release - release tvlv from lists and free + * @ref: kref pointer of the tvlv + */ +static void batadv_tvlv_container_release(struct kref *ref) +{ + struct batadv_tvlv_container *tvlv; + + tvlv = container_of(ref, struct batadv_tvlv_container, refcount); + kfree(tvlv); +} + +/** + * batadv_tvlv_container_put - decrement the tvlv container refcounter and + * possibly release it + * @tvlv: the tvlv container to free + */ +static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) +{ + kref_put(&tvlv->refcount, batadv_tvlv_container_release); +} + +/** + * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container + * list based on the provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type to look for + * @version: tvlv container version to look for + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + * + * Return: tvlv container if found or NULL otherwise. + */ +static struct batadv_tvlv_container * +batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) +{ + struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; + + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); + + hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) { + if (tvlv_tmp->tvlv_hdr.type != type) + continue; + + if (tvlv_tmp->tvlv_hdr.version != version) + continue; + + kref_get(&tvlv_tmp->refcount); + tvlv = tvlv_tmp; + break; + } + + return tvlv; +} + +/** + * batadv_tvlv_container_list_size - calculate the size of the tvlv container + * list entries + * @bat_priv: the bat priv with all the soft interface information + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + * + * Return: size of all currently registered tvlv containers in bytes. + */ +static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) +{ + struct batadv_tvlv_container *tvlv; + u16 tvlv_len = 0; + + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); + + hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { + tvlv_len += sizeof(struct batadv_tvlv_hdr); + tvlv_len += ntohs(tvlv->tvlv_hdr.len); + } + + return tvlv_len; +} + +/** + * batadv_tvlv_container_remove - remove tvlv container from the tvlv container + * list + * @bat_priv: the bat priv with all the soft interface information + * @tvlv: the to be removed tvlv container + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + */ +static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, + struct batadv_tvlv_container *tvlv) +{ + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); + + if (!tvlv) + return; + + hlist_del(&tvlv->list); + + /* first call to decrement the counter, second call to free */ + batadv_tvlv_container_put(tvlv); + batadv_tvlv_container_put(tvlv); +} + +/** + * batadv_tvlv_container_unregister - unregister tvlv container based on the + * provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type to unregister + * @version: tvlv container type to unregister + */ +void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, + u8 type, u8 version) +{ + struct batadv_tvlv_container *tvlv; + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv = batadv_tvlv_container_get(bat_priv, type, version); + batadv_tvlv_container_remove(bat_priv, tvlv); + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); +} + +/** + * batadv_tvlv_container_register - register tvlv type, version and content + * to be propagated with each (primary interface) OGM + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type + * @version: tvlv container version + * @tvlv_value: tvlv container content + * @tvlv_value_len: tvlv container content length + * + * If a container of the same type and version was already registered the new + * content is going to replace the old one. + */ +void batadv_tvlv_container_register(struct batadv_priv *bat_priv, + u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len) +{ + struct batadv_tvlv_container *tvlv_old, *tvlv_new; + + if (!tvlv_value) + tvlv_value_len = 0; + + tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC); + if (!tvlv_new) + return; + + tvlv_new->tvlv_hdr.version = version; + tvlv_new->tvlv_hdr.type = type; + tvlv_new->tvlv_hdr.len = htons(tvlv_value_len); + + memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len)); + INIT_HLIST_NODE(&tvlv_new->list); + kref_init(&tvlv_new->refcount); + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); + batadv_tvlv_container_remove(bat_priv, tvlv_old); + hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list); + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); +} + +/** + * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate + * requested packet size + * @packet_buff: packet buffer + * @packet_buff_len: packet buffer size + * @min_packet_len: requested packet minimum size + * @additional_packet_len: requested additional packet size on top of minimum + * size + * + * Return: true of the packet buffer could be changed to the requested size, + * false otherwise. + */ +static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, + int *packet_buff_len, + int min_packet_len, + int additional_packet_len) +{ + unsigned char *new_buff; + + new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC); + + /* keep old buffer if kmalloc should fail */ + if (!new_buff) + return false; + + memcpy(new_buff, *packet_buff, min_packet_len); + kfree(*packet_buff); + *packet_buff = new_buff; + *packet_buff_len = min_packet_len + additional_packet_len; + + return true; +} + +/** + * batadv_tvlv_container_ogm_append - append tvlv container content to given + * OGM packet buffer + * @bat_priv: the bat priv with all the soft interface information + * @packet_buff: ogm packet buffer + * @packet_buff_len: ogm packet buffer size including ogm header and tvlv + * content + * @packet_min_len: ogm header size to be preserved for the OGM itself + * + * The ogm packet might be enlarged or shrunk depending on the current size + * and the size of the to-be-appended tvlv containers. + * + * Return: size of all appended tvlv containers in bytes. + */ +u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, int packet_min_len) +{ + struct batadv_tvlv_container *tvlv; + struct batadv_tvlv_hdr *tvlv_hdr; + u16 tvlv_value_len; + void *tvlv_value; + bool ret; + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv_value_len = batadv_tvlv_container_list_size(bat_priv); + + ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, + packet_min_len, tvlv_value_len); + + if (!ret) + goto end; + + if (!tvlv_value_len) + goto end; + + tvlv_value = (*packet_buff) + packet_min_len; + + hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { + tvlv_hdr = tvlv_value; + tvlv_hdr->type = tvlv->tvlv_hdr.type; + tvlv_hdr->version = tvlv->tvlv_hdr.version; + tvlv_hdr->len = tvlv->tvlv_hdr.len; + tvlv_value = tvlv_hdr + 1; + memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len)); + tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len); + } + +end: + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); + return tvlv_value_len; +} + +/** + * batadv_tvlv_call_handler - parse the given tvlv buffer to call the + * appropriate handlers + * @bat_priv: the bat priv with all the soft interface information + * @tvlv_handler: tvlv callback function handling the tvlv content + * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet + * @orig_node: orig node emitting the ogm packet + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + * + * Return: success if handler was not found or the return value of the handler + * callback. + */ +static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, + struct batadv_tvlv_handler *tvlv_handler, + bool ogm_source, + struct batadv_orig_node *orig_node, + u8 *src, u8 *dst, + void *tvlv_value, u16 tvlv_value_len) +{ + if (!tvlv_handler) + return NET_RX_SUCCESS; + + if (ogm_source) { + if (!tvlv_handler->ogm_handler) + return NET_RX_SUCCESS; + + if (!orig_node) + return NET_RX_SUCCESS; + + tvlv_handler->ogm_handler(bat_priv, orig_node, + BATADV_NO_FLAGS, + tvlv_value, tvlv_value_len); + tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED; + } else { + if (!src) + return NET_RX_SUCCESS; + + if (!dst) + return NET_RX_SUCCESS; + + if (!tvlv_handler->unicast_handler) + return NET_RX_SUCCESS; + + return tvlv_handler->unicast_handler(bat_priv, src, + dst, tvlv_value, + tvlv_value_len); + } + + return NET_RX_SUCCESS; +} + +/** + * batadv_tvlv_containers_process - parse the given tvlv buffer to call the + * appropriate handlers + * @bat_priv: the bat priv with all the soft interface information + * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet + * @orig_node: orig node emitting the ogm packet + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + * + * Return: success when processing an OGM or the return value of all called + * handler callbacks. + */ +int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, + bool ogm_source, + struct batadv_orig_node *orig_node, + u8 *src, u8 *dst, + void *tvlv_value, u16 tvlv_value_len) +{ + struct batadv_tvlv_handler *tvlv_handler; + struct batadv_tvlv_hdr *tvlv_hdr; + u16 tvlv_value_cont_len; + u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND; + int ret = NET_RX_SUCCESS; + + while (tvlv_value_len >= sizeof(*tvlv_hdr)) { + tvlv_hdr = tvlv_value; + tvlv_value_cont_len = ntohs(tvlv_hdr->len); + tvlv_value = tvlv_hdr + 1; + tvlv_value_len -= sizeof(*tvlv_hdr); + + if (tvlv_value_cont_len > tvlv_value_len) + break; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, + tvlv_hdr->type, + tvlv_hdr->version); + + ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler, + ogm_source, orig_node, + src, dst, tvlv_value, + tvlv_value_cont_len); + if (tvlv_handler) + batadv_tvlv_handler_put(tvlv_handler); + tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; + tvlv_value_len -= tvlv_value_cont_len; + } + + if (!ogm_source) + return ret; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tvlv_handler, + &bat_priv->tvlv.handler_list, list) { + if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) && + !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED)) + tvlv_handler->ogm_handler(bat_priv, orig_node, + cifnotfound, NULL, 0); + + tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED; + } + rcu_read_unlock(); + + return NET_RX_SUCCESS; +} + +/** + * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate + * handlers + * @bat_priv: the bat priv with all the soft interface information + * @batadv_ogm_packet: ogm packet containing the tvlv containers + * @orig_node: orig node emitting the ogm packet + */ +void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, + struct batadv_ogm_packet *batadv_ogm_packet, + struct batadv_orig_node *orig_node) +{ + void *tvlv_value; + u16 tvlv_value_len; + + if (!batadv_ogm_packet) + return; + + tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len); + if (!tvlv_value_len) + return; + + tvlv_value = batadv_ogm_packet + 1; + + batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL, + tvlv_value, tvlv_value_len); +} + +/** + * batadv_tvlv_handler_register - register tvlv handler based on the provided + * type and version (both need to match) for ogm tvlv payload and/or unicast + * payload + * @bat_priv: the bat priv with all the soft interface information + * @optr: ogm tvlv handler callback function. This function receives the orig + * node, flags and the tvlv content as argument to process. + * @uptr: unicast tvlv handler callback function. This function receives the + * source & destination of the unicast packet as well as the tvlv content + * to process. + * @type: tvlv handler type to be registered + * @version: tvlv handler version to be registered + * @flags: flags to enable or disable TVLV API behavior + */ +void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, + void (*optr)(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + u8 flags, + void *tvlv_value, + u16 tvlv_value_len), + int (*uptr)(struct batadv_priv *bat_priv, + u8 *src, u8 *dst, + void *tvlv_value, + u16 tvlv_value_len), + u8 type, u8 version, u8 flags) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); + if (tvlv_handler) { + batadv_tvlv_handler_put(tvlv_handler); + return; + } + + tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC); + if (!tvlv_handler) + return; + + tvlv_handler->ogm_handler = optr; + tvlv_handler->unicast_handler = uptr; + tvlv_handler->type = type; + tvlv_handler->version = version; + tvlv_handler->flags = flags; + kref_init(&tvlv_handler->refcount); + INIT_HLIST_NODE(&tvlv_handler->list); + + spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); +} + +/** + * batadv_tvlv_handler_unregister - unregister tvlv handler based on the + * provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv handler type to be unregistered + * @version: tvlv handler version to be unregistered + */ +void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, + u8 type, u8 version) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); + if (!tvlv_handler) + return; + + batadv_tvlv_handler_put(tvlv_handler); + spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + hlist_del_rcu(&tvlv_handler->list); + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); + batadv_tvlv_handler_put(tvlv_handler); +} + +/** + * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the + * specified host + * @bat_priv: the bat priv with all the soft interface information + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @type: tvlv type + * @version: tvlv version + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + */ +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, + u8 *dst, u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len) +{ + struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; + struct batadv_tvlv_hdr *tvlv_hdr; + struct batadv_orig_node *orig_node; + struct sk_buff *skb; + unsigned char *tvlv_buff; + unsigned int tvlv_len; + ssize_t hdr_len = sizeof(*unicast_tvlv_packet); + int res; + + orig_node = batadv_orig_hash_find(bat_priv, dst); + if (!orig_node) + return; + + tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len; + + skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len); + if (!skb) + goto out; + + skb->priority = TC_PRIO_CONTROL; + skb_reserve(skb, ETH_HLEN); + tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); + unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; + unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV; + unicast_tvlv_packet->version = BATADV_COMPAT_VERSION; + unicast_tvlv_packet->ttl = BATADV_TTL; + unicast_tvlv_packet->reserved = 0; + unicast_tvlv_packet->tvlv_len = htons(tvlv_len); + unicast_tvlv_packet->align = 0; + ether_addr_copy(unicast_tvlv_packet->src, src); + ether_addr_copy(unicast_tvlv_packet->dst, dst); + + tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1); + tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff; + tvlv_hdr->version = version; + tvlv_hdr->type = type; + tvlv_hdr->len = htons(tvlv_value_len); + tvlv_buff += sizeof(*tvlv_hdr); + memcpy(tvlv_buff, tvlv_value, tvlv_value_len); + + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res == -1) + kfree_skb(skb); +out: + batadv_orig_node_put(orig_node); +} diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h new file mode 100644 index 000000000000..e4369b547b43 --- /dev/null +++ b/net/batman-adv/tvlv.h @@ -0,0 +1,61 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_TVLV_H_ +#define _NET_BATMAN_ADV_TVLV_H_ + +#include "main.h" + +#include <linux/types.h> + +struct batadv_ogm_packet; + +void batadv_tvlv_container_register(struct batadv_priv *bat_priv, + u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len); +u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, int packet_min_len); +void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, + struct batadv_ogm_packet *batadv_ogm_packet, + struct batadv_orig_node *orig_node); +void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, + u8 type, u8 version); + +void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, + void (*optr)(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + u8 flags, + void *tvlv_value, + u16 tvlv_value_len), + int (*uptr)(struct batadv_priv *bat_priv, + u8 *src, u8 *dst, + void *tvlv_value, + u16 tvlv_value_len), + u8 type, u8 version, u8 flags); +void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, + u8 type, u8 version); +int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, + bool ogm_source, + struct batadv_orig_node *orig_node, + u8 *src, u8 *dst, + void *tvlv_buff, u16 tvlv_buff_len); +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, + u8 *dst, u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len); + +#endif /* _NET_BATMAN_ADV_TVLV_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ba846b078af8..43db7b61f8eb 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -33,6 +33,7 @@ #include <linux/types.h> #include <linux/wait.h> #include <linux/workqueue.h> +#include <uapi/linux/batman_adv.h> #include "packet.h" @@ -707,6 +708,8 @@ struct batadv_priv_debug_log { * @list: list of available gateway nodes * @list_lock: lock protecting gw_list & curr_gw * @curr_gw: pointer to currently selected gateway node + * @mode: gateway operation: off, client or server (see batadv_gw_modes) + * @sel_class: gateway selection class (applies if gw_mode client) * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server) * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server) * @reselect: bool indicating a gateway re-selection is in progress @@ -715,6 +718,8 @@ struct batadv_priv_gw { struct hlist_head list; spinlock_t list_lock; /* protects gw_list & curr_gw */ struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ + atomic_t mode; + atomic_t sel_class; atomic_t bandwidth_down; atomic_t bandwidth_up; atomic_t reselect; @@ -751,14 +756,28 @@ struct batadv_priv_dat { #ifdef CONFIG_BATMAN_ADV_MCAST /** + * struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged + * @exists: whether a querier exists in the mesh + * @shadowing: if a querier exists, whether it is potentially shadowing + * multicast listeners (i.e. querier is behind our own bridge segment) + */ +struct batadv_mcast_querier_state { + bool exists; + bool shadowing; +}; + +/** * struct batadv_priv_mcast - per mesh interface mcast data * @mla_list: list of multicast addresses we are currently announcing via TT * @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable * multicast traffic * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic + * @querier_ipv4: the current state of an IGMP querier in the mesh + * @querier_ipv6: the current state of an MLD querier in the mesh * @flags: the flags we have last sent in our mcast tvlv * @enabled: whether the multicast tvlv is currently enabled + * @bridged: whether the soft interface has a bridge on top * @num_disabled: number of nodes that have no mcast tvlv * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic * @num_want_all_ipv4: counter for items in want_all_ipv4_list @@ -771,8 +790,11 @@ struct batadv_priv_mcast { struct hlist_head want_all_unsnoopables_list; struct hlist_head want_all_ipv4_list; struct hlist_head want_all_ipv6_list; + struct batadv_mcast_querier_state querier_ipv4; + struct batadv_mcast_querier_state querier_ipv6; u8 flags; bool enabled; + bool bridged; atomic_t num_disabled; atomic_t num_want_all_unsnoopables; atomic_t num_want_all_ipv4; @@ -812,6 +834,111 @@ struct batadv_priv_nc { }; /** + * struct batadv_tp_unacked - unacked packet meta-information + * @seqno: seqno of the unacked packet + * @len: length of the packet + * @list: list node for batadv_tp_vars::unacked_list + * + * This struct is supposed to represent a buffer unacked packet. However, since + * the purpose of the TP meter is to count the traffic only, there is no need to + * store the entire sk_buff, the starting offset and the length are enough + */ +struct batadv_tp_unacked { + u32 seqno; + u16 len; + struct list_head list; +}; + +/** + * enum batadv_tp_meter_role - Modus in tp meter session + * @BATADV_TP_RECEIVER: Initialized as receiver + * @BATADV_TP_SENDER: Initialized as sender + */ +enum batadv_tp_meter_role { + BATADV_TP_RECEIVER, + BATADV_TP_SENDER +}; + +/** + * struct batadv_tp_vars - tp meter private variables per session + * @list: list node for bat_priv::tp_list + * @timer: timer for ack (receiver) and retry (sender) + * @bat_priv: pointer to the mesh object + * @start_time: start time in jiffies + * @other_end: mac address of remote + * @role: receiver/sender modi + * @sending: sending binary semaphore: 1 if sending, 0 is not + * @reason: reason for a stopped session + * @finish_work: work item for the finishing procedure + * @test_length: test length in milliseconds + * @session: TP session identifier + * @icmp_uid: local ICMP "socket" index + * @dec_cwnd: decimal part of the cwnd used during linear growth + * @cwnd: current size of the congestion window + * @cwnd_lock: lock do protect @cwnd & @dec_cwnd + * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the + * connection switches to the Congestion Avoidance state + * @last_acked: last acked byte + * @last_sent: last sent byte, not yet acked + * @tot_sent: amount of data sent/ACKed so far + * @dup_acks: duplicate ACKs counter + * @fast_recovery: true if in Fast Recovery mode + * @recover: last sent seqno when entering Fast Recovery + * @rto: sender timeout + * @srtt: smoothed RTT scaled by 2^3 + * @rttvar: RTT variation scaled by 2^2 + * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout + * @prerandom_offset: offset inside the prerandom buffer + * @prerandom_lock: spinlock protecting access to prerandom_offset + * @last_recv: last in-order received packet + * @unacked_list: list of unacked packets (meta-info only) + * @unacked_lock: protect unacked_list + * @last_recv_time: time time (jiffies) a msg was received + * @refcount: number of context where the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_tp_vars { + struct hlist_node list; + struct timer_list timer; + struct batadv_priv *bat_priv; + unsigned long start_time; + u8 other_end[ETH_ALEN]; + enum batadv_tp_meter_role role; + atomic_t sending; + enum batadv_tp_meter_reason reason; + struct delayed_work finish_work; + u32 test_length; + u8 session[2]; + u8 icmp_uid; + + /* sender variables */ + u16 dec_cwnd; + u32 cwnd; + spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */ + u32 ss_threshold; + atomic_t last_acked; + u32 last_sent; + atomic64_t tot_sent; + atomic_t dup_acks; + bool fast_recovery; + u32 recover; + u32 rto; + u32 srtt; + u32 rttvar; + wait_queue_head_t more_bytes; + u32 prerandom_offset; + spinlock_t prerandom_lock; /* Protects prerandom_offset */ + + /* receiver variables */ + u32 last_recv; + struct list_head unacked_list; + spinlock_t unacked_lock; /* Protects unacked_list */ + unsigned long last_recv_time; + struct kref refcount; + struct rcu_head rcu; +}; + +/** * struct batadv_softif_vlan - per VLAN attributes set * @bat_priv: pointer to the mesh object * @vid: VLAN identifier @@ -865,8 +992,6 @@ struct batadv_priv_bat_v { * enabled * @multicast_mode: Enable or disable multicast optimizations on this node's * sender/originating side - * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes) - * @gw_sel_class: gateway selection class (applies if gw_mode client) * @orig_interval: OGM broadcast interval in milliseconds * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop * @log_level: configured log level (see batadv_dbg_level) @@ -881,14 +1006,17 @@ struct batadv_priv_bat_v { * @debug_dir: dentry for debugfs batman-adv subdirectory * @forw_bat_list: list of aggregated OGMs that will be forwarded * @forw_bcast_list: list of broadcast packets that will be rebroadcasted + * @tp_list: list of tp sessions + * @tp_num: number of currently active tp sessions * @orig_hash: hash table containing mesh participants (orig nodes) * @forw_bat_list_lock: lock protecting forw_bat_list * @forw_bcast_list_lock: lock protecting forw_bcast_list + * @tp_list_lock: spinlock protecting @tp_list * @orig_work: work queue callback item for orig node purging * @cleanup_work: work queue callback item for soft-interface deinit * @primary_if: one of the hard-interfaces assigned to this mesh interface * becomes the primary interface - * @bat_algo_ops: routing algorithm used by this mesh interface + * @algo_ops: routing algorithm used by this mesh interface * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top * of the mesh interface represented by this object * @softif_vlan_list_lock: lock protecting softif_vlan_list @@ -922,8 +1050,6 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_MCAST atomic_t multicast_mode; #endif - atomic_t gw_mode; - atomic_t gw_sel_class; atomic_t orig_interval; atomic_t hop_penalty; #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -939,13 +1065,16 @@ struct batadv_priv { struct dentry *debug_dir; struct hlist_head forw_bat_list; struct hlist_head forw_bcast_list; + struct hlist_head tp_list; struct batadv_hashtable *orig_hash; spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */ + spinlock_t tp_list_lock; /* protects tp_list */ + atomic_t tp_num; struct delayed_work orig_work; struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ - struct batadv_algo_ops *bat_algo_ops; + struct batadv_algo_ops *algo_ops; struct hlist_head softif_vlan_list; spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */ #ifdef CONFIG_BATMAN_ADV_BLA @@ -1261,66 +1390,77 @@ struct batadv_forw_packet { }; /** + * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific) + * @activate: start routing mechanisms when hard-interface is brought up + * @enable: init routing info when hard-interface is enabled + * @disable: de-init routing info when hard-interface is disabled + * @update_mac: (re-)init mac addresses of the protocol information + * belonging to this hard-interface + * @primary_set: called when primary interface is selected / changed + */ +struct batadv_algo_iface_ops { + void (*activate)(struct batadv_hard_iface *hard_iface); + int (*enable)(struct batadv_hard_iface *hard_iface); + void (*disable)(struct batadv_hard_iface *hard_iface); + void (*update_mac)(struct batadv_hard_iface *hard_iface); + void (*primary_set)(struct batadv_hard_iface *hard_iface); +}; + +/** + * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific) + * @hardif_init: called on creation of single hop entry + * @cmp: compare the metrics of two neighbors for their respective outgoing + * interfaces + * @is_similar_or_better: check if neigh1 is equally similar or better than + * neigh2 for their respective outgoing interface from the metric prospective + * @print: print the single hop neighbor list (optional) + */ +struct batadv_algo_neigh_ops { + void (*hardif_init)(struct batadv_hardif_neigh_node *neigh); + int (*cmp)(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2); + bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2); + void (*print)(struct batadv_priv *priv, struct seq_file *seq); +}; + +/** + * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific) + * @free: free the resources allocated by the routing algorithm for an orig_node + * object + * @add_if: ask the routing algorithm to apply the needed changes to the + * orig_node due to a new hard-interface being added into the mesh + * @del_if: ask the routing algorithm to apply the needed changes to the + * orig_node due to an hard-interface being removed from the mesh + * @print: print the originator table (optional) + */ +struct batadv_algo_orig_ops { + void (*free)(struct batadv_orig_node *orig_node); + int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); + int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, + int del_if_num); + void (*print)(struct batadv_priv *priv, struct seq_file *seq, + struct batadv_hard_iface *hard_iface); +}; + +/** * struct batadv_algo_ops - mesh algorithm callbacks * @list: list node for the batadv_algo_list * @name: name of the algorithm - * @bat_iface_activate: start routing mechanisms when hard-interface is brought - * up - * @bat_iface_enable: init routing info when hard-interface is enabled - * @bat_iface_disable: de-init routing info when hard-interface is disabled - * @bat_iface_update_mac: (re-)init mac addresses of the protocol information - * belonging to this hard-interface - * @bat_primary_iface_set: called when primary interface is selected / changed - * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue - * @bat_ogm_emit: send scheduled OGM - * @bat_hardif_neigh_init: called on creation of single hop entry - * @bat_neigh_cmp: compare the metrics of two neighbors for their respective - * outgoing interfaces - * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or - * better than neigh2 for their respective outgoing interface from the metric - * prospective - * @bat_neigh_print: print the single hop neighbor list (optional) - * @bat_neigh_free: free the resources allocated by the routing algorithm for a - * neigh_node object - * @bat_orig_print: print the originator table (optional) - * @bat_orig_free: free the resources allocated by the routing algorithm for an - * orig_node object - * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to - * the orig_node due to a new hard-interface being added into the mesh - * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to - * the orig_node due to an hard-interface being removed from the mesh + * @iface: callbacks related to interface handling + * @neigh: callbacks related to neighbors handling + * @orig: callbacks related to originators handling */ struct batadv_algo_ops { struct hlist_node list; char *name; - void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface); - int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); - void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); - void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); - void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface); - void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); - void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); - /* neigh_node handling API */ - void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh); - int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1, - struct batadv_hard_iface *if_outgoing1, - struct batadv_neigh_node *neigh2, - struct batadv_hard_iface *if_outgoing2); - bool (*bat_neigh_is_similar_or_better) - (struct batadv_neigh_node *neigh1, - struct batadv_hard_iface *if_outgoing1, - struct batadv_neigh_node *neigh2, - struct batadv_hard_iface *if_outgoing2); - void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq); - void (*bat_neigh_free)(struct batadv_neigh_node *neigh); - /* orig_node handling API */ - void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq, - struct batadv_hard_iface *hard_iface); - void (*bat_orig_free)(struct batadv_orig_node *orig_node); - int (*bat_orig_add_if)(struct batadv_orig_node *orig_node, - int max_if_num); - int (*bat_orig_del_if)(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num); + struct batadv_algo_iface_ops iface; + struct batadv_algo_neigh_ops neigh; + struct batadv_algo_orig_ops orig; }; /** diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 2c8095a5d824..8eecd0ec22f2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -104,8 +104,16 @@ static int br_dev_init(struct net_device *dev) return -ENOMEM; err = br_vlan_init(br); - if (err) + if (err) { free_percpu(br->stats); + return err; + } + + err = br_multicast_init_stats(br); + if (err) { + free_percpu(br->stats); + br_vlan_flush(br); + } br_set_lockdep_class(dev); return err; @@ -341,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_add_slave = br_add_slave, .ndo_del_slave = br_del_slave, .ndo_fix_features = br_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index f47759f05b6d..6c196037d818 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -198,8 +198,10 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb), bool unicast) { - struct net_bridge_port *p; + u8 igmp_type = br_multicast_igmp_type(skb); + __be16 proto = skb->protocol; struct net_bridge_port *prev; + struct net_bridge_port *p; prev = NULL; @@ -218,6 +220,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; + if (prev == p) + br_multicast_count(p->br, p, proto, igmp_type, + BR_MCAST_DIR_TX); } if (!prev) @@ -257,9 +262,12 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb)) { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + u8 igmp_type = br_multicast_igmp_type(skb); struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; + __be16 proto = skb->protocol; + struct hlist_node *rp; rp = rcu_dereference(hlist_first_rcu(&br->router_list)); @@ -277,6 +285,9 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, prev = maybe_deliver(prev, port, skb, __packet_hook); if (IS_ERR(prev)) goto out; + if (prev == port) + br_multicast_count(port->br, port, proto, igmp_type, + BR_MCAST_DIR_TX); if ((unsigned long)lport >= (unsigned long)port) p = rcu_dereference(p->next); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8217aecf025b..f2fede05d32c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -345,8 +345,8 @@ static int find_portno(struct net_bridge *br) static struct net_bridge_port *new_nbp(struct net_bridge *br, struct net_device *dev) { - int index; struct net_bridge_port *p; + int index, err; index = find_portno(br); if (index < 0) @@ -366,7 +366,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, br_init_port(p); br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); - br_multicast_add_port(p); + err = br_multicast_add_port(p); + if (err) { + dev_put(dev); + kfree(p); + p = ERR_PTR(err); + } return p; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 43d2cd862bc2..786602bc0567 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -60,6 +60,9 @@ static int br_pass_frame_up(struct sk_buff *skb) skb = br_handle_vlan(br, vg, skb); if (!skb) return NET_RX_DROP; + /* update the multicast stats if the packet is IGMP/MLD */ + br_multicast_count(br, NULL, skb->protocol, br_multicast_igmp_type(skb), + BR_MCAST_DIR_TX); return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(indev), NULL, skb, indev, NULL, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 43844144c9c4..e405eef0ae2e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -361,7 +361,8 @@ out: } static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, - __be32 group) + __be32 group, + u8 *igmp_type) { struct sk_buff *skb; struct igmphdr *ih; @@ -411,6 +412,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, skb_set_transport_header(skb, skb->len); ih = igmp_hdr(skb); + *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY; ih->type = IGMP_HOST_MEMBERSHIP_QUERY; ih->code = (group ? br->multicast_last_member_interval : br->multicast_query_response_interval) / @@ -428,7 +430,8 @@ out: #if IS_ENABLED(CONFIG_IPV6) static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, - const struct in6_addr *group) + const struct in6_addr *grp, + u8 *igmp_type) { struct sk_buff *skb; struct ipv6hdr *ip6h; @@ -487,16 +490,17 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, skb_set_transport_header(skb, skb->len); mldq = (struct mld_msg *) icmp6_hdr(skb); - interval = ipv6_addr_any(group) ? + interval = ipv6_addr_any(grp) ? br->multicast_query_response_interval : br->multicast_last_member_interval; + *igmp_type = ICMPV6_MGM_QUERY; mldq->mld_type = ICMPV6_MGM_QUERY; mldq->mld_code = 0; mldq->mld_cksum = 0; mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); mldq->mld_reserved = 0; - mldq->mld_mca = *group; + mldq->mld_mca = *grp; /* checksum */ mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -513,14 +517,16 @@ out: #endif static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, - struct br_ip *addr) + struct br_ip *addr, + u8 *igmp_type) { switch (addr->proto) { case htons(ETH_P_IP): - return br_ip4_multicast_alloc_query(br, addr->u.ip4); + return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_ip6_multicast_alloc_query(br, &addr->u.ip6); + return br_ip6_multicast_alloc_query(br, &addr->u.ip6, + igmp_type); #endif } return NULL; @@ -829,18 +835,23 @@ static void __br_multicast_send_query(struct net_bridge *br, struct br_ip *ip) { struct sk_buff *skb; + u8 igmp_type; - skb = br_multicast_alloc_query(br, ip); + skb = br_multicast_alloc_query(br, ip, &igmp_type); if (!skb) return; if (port) { skb->dev = port->dev; + br_multicast_count(br, port, skb->protocol, igmp_type, + BR_MCAST_DIR_TX); NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, dev_net(port->dev), NULL, skb, NULL, skb->dev, br_dev_queue_push_xmit); } else { br_multicast_select_own_querier(br, ip, skb); + br_multicast_count(br, port, skb->protocol, igmp_type, + BR_MCAST_DIR_RX); netif_rx(skb); } } @@ -918,7 +929,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data) } #endif -void br_multicast_add_port(struct net_bridge_port *port) +int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; @@ -930,6 +941,11 @@ void br_multicast_add_port(struct net_bridge_port *port) setup_timer(&port->ip6_own_query.timer, br_ip6_multicast_port_query_expired, (unsigned long)port); #endif + port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); + if (!port->mcast_stats) + return -ENOMEM; + + return 0; } void br_multicast_del_port(struct net_bridge_port *port) @@ -944,6 +960,7 @@ void br_multicast_del_port(struct net_bridge_port *port) br_multicast_del_pg(br, pg); spin_unlock_bh(&br->multicast_lock); del_timer_sync(&port->multicast_router_timer); + free_percpu(port->mcast_stats); } static void br_multicast_enable(struct bridge_mcast_own_query *query) @@ -1583,6 +1600,39 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, } #endif +static void br_multicast_err_count(const struct net_bridge *br, + const struct net_bridge_port *p, + __be16 proto) +{ + struct bridge_mcast_stats __percpu *stats; + struct bridge_mcast_stats *pstats; + + if (!br->multicast_stats_enabled) + return; + + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + pstats = this_cpu_ptr(stats); + + u64_stats_update_begin(&pstats->syncp); + switch (proto) { + case htons(ETH_P_IP): + pstats->mstats.igmp_parse_errors++; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + pstats->mstats.mld_parse_errors++; + break; +#endif + } + u64_stats_update_end(&pstats->syncp); +} + static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, @@ -1599,11 +1649,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; } else if (err < 0) { + br_multicast_err_count(br, port, skb->protocol); return err; } - BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb); + BR_INPUT_SKB_CB(skb)->igmp = ih->type; switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: @@ -1625,6 +1676,9 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, if (skb_trimmed && skb_trimmed != skb) kfree_skb(skb_trimmed); + br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp, + BR_MCAST_DIR_RX); + return err; } @@ -1645,11 +1699,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; } else if (err < 0) { + br_multicast_err_count(br, port, skb->protocol); return err; } - BR_INPUT_SKB_CB(skb)->igmp = 1; mld = (struct mld_msg *)skb_transport_header(skb); + BR_INPUT_SKB_CB(skb)->igmp = mld->mld_type; switch (mld->mld_type) { case ICMPV6_MGM_REPORT: @@ -1670,6 +1725,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, if (skb_trimmed && skb_trimmed != skb) kfree_skb(skb_trimmed); + br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp, + BR_MCAST_DIR_RX); + return err; } #endif @@ -1677,6 +1735,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, u16 vid) { + int ret = 0; + BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; @@ -1685,14 +1745,16 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, switch (skb->protocol) { case htons(ETH_P_IP): - return br_multicast_ipv4_rcv(br, port, skb, vid); + ret = br_multicast_ipv4_rcv(br, port, skb, vid); + break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_multicast_ipv6_rcv(br, port, skb, vid); + ret = br_multicast_ipv6_rcv(br, port, skb, vid); + break; #endif } - return 0; + return ret; } static void br_multicast_query_expired(struct net_bridge *br, @@ -1831,6 +1893,8 @@ void br_multicast_dev_del(struct net_bridge *br) out: spin_unlock_bh(&br->multicast_lock); + + free_percpu(br->mcast_stats); } int br_multicast_set_router(struct net_bridge *br, unsigned long val) @@ -2185,3 +2249,128 @@ unlock: return ret; } EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); + +static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats, + __be16 proto, u8 type, u8 dir) +{ + struct bridge_mcast_stats *pstats = this_cpu_ptr(stats); + + u64_stats_update_begin(&pstats->syncp); + switch (proto) { + case htons(ETH_P_IP): + switch (type) { + case IGMP_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v1reports[dir]++; + break; + case IGMPV2_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v2reports[dir]++; + break; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v3reports[dir]++; + break; + case IGMP_HOST_MEMBERSHIP_QUERY: + pstats->mstats.igmp_queries[dir]++; + break; + case IGMP_HOST_LEAVE_MESSAGE: + pstats->mstats.igmp_leaves[dir]++; + break; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + switch (type) { + case ICMPV6_MGM_REPORT: + pstats->mstats.mld_v1reports[dir]++; + break; + case ICMPV6_MLD2_REPORT: + pstats->mstats.mld_v2reports[dir]++; + break; + case ICMPV6_MGM_QUERY: + pstats->mstats.mld_queries[dir]++; + break; + case ICMPV6_MGM_REDUCTION: + pstats->mstats.mld_leaves[dir]++; + break; + } + break; +#endif /* CONFIG_IPV6 */ + } + u64_stats_update_end(&pstats->syncp); +} + +void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, + __be16 proto, u8 type, u8 dir) +{ + struct bridge_mcast_stats __percpu *stats; + + /* if multicast_disabled is true then igmp type can't be set */ + if (!type || !br->multicast_stats_enabled) + return; + + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + br_mcast_stats_add(stats, proto, type, dir); +} + +int br_multicast_init_stats(struct net_bridge *br) +{ + br->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); + if (!br->mcast_stats) + return -ENOMEM; + + return 0; +} + +static void mcast_stats_add_dir(u64 *dst, u64 *src) +{ + dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX]; + dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX]; +} + +void br_multicast_get_stats(const struct net_bridge *br, + const struct net_bridge_port *p, + struct br_mcast_stats *dest) +{ + struct bridge_mcast_stats __percpu *stats; + struct br_mcast_stats tdst; + int i; + + memset(dest, 0, sizeof(*dest)); + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + memset(&tdst, 0, sizeof(tdst)); + for_each_possible_cpu(i) { + struct bridge_mcast_stats *cpu_stats = per_cpu_ptr(stats, i); + struct br_mcast_stats temp; + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + mcast_stats_add_dir(tdst.igmp_queries, temp.igmp_queries); + mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves); + mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports); + mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports); + mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports); + tdst.igmp_parse_errors += temp.igmp_parse_errors; + + mcast_stats_add_dir(tdst.mld_queries, temp.mld_queries); + mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves); + mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports); + mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports); + tdst.mld_parse_errors += temp.mld_parse_errors; + } + memcpy(dest, &tdst, sizeof(*dest)); +} diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2d25979273a6..77e7f69bf80d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -700,7 +700,7 @@ static int br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { - unsigned int mtu = ip_skb_dst_mtu(skb); + unsigned int mtu = ip_skb_dst_mtu(sk, skb); struct iphdr *iph = ip_hdr(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 85e89f693589..f2a29e467e78 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -851,6 +851,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 }, [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 }, [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1055,6 +1056,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], br->multicast_startup_query_interval = clock_t_to_jiffies(val); } + + if (data[IFLA_BR_MCAST_STATS_ENABLED]) { + __u8 mcast_stats; + + mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]); + br->multicast_stats_enabled = !!mcast_stats; + } #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (data[IFLA_BR_NF_CALL_IPTABLES]) { @@ -1110,6 +1118,7 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERY_USE_IFADDR */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERIER */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_STATS_ENABLED */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_ELASTICITY */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_MAX */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_LAST_MEMBER_CNT */ @@ -1187,6 +1196,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR, br->multicast_query_use_ifaddr) || nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) || + nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED, + br->multicast_stats_enabled) || nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, br->hash_elasticity) || nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) || @@ -1234,7 +1245,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return 0; } -static size_t br_get_linkxstats_size(const struct net_device *dev) +static size_t bridge_get_linkxstats_size(const struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_vlan_group *vg; @@ -1242,53 +1253,88 @@ static size_t br_get_linkxstats_size(const struct net_device *dev) int numvls = 0; vg = br_vlan_group(br); - if (!vg) - return 0; - - /* we need to count all, even placeholder entries */ - list_for_each_entry(v, &vg->vlan_list, vlist) - numvls++; + if (vg) { + /* we need to count all, even placeholder entries */ + list_for_each_entry(v, &vg->vlan_list, vlist) + numvls++; + } - /* account for the vlans and the link xstats type nest attribute */ return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + + nla_total_size(sizeof(struct br_mcast_stats)) + nla_total_size(0); } -static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, - int *prividx) +static size_t brport_get_linkxstats_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(struct br_mcast_stats)) + + nla_total_size(0); +} + +static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) +{ + size_t retsize = 0; + + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + retsize = bridge_get_linkxstats_size(dev); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + retsize = brport_get_linkxstats_size(dev); + break; + } + + return retsize; +} + +static int bridge_fill_linkxstats(struct sk_buff *skb, + const struct net_device *dev, + int *prividx) { struct net_bridge *br = netdev_priv(dev); + struct nlattr *nla __maybe_unused; struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct nlattr *nest; int vl_idx = 0; - vg = br_vlan_group(br); - if (!vg) - goto out; nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); if (!nest) return -EMSGSIZE; - list_for_each_entry(v, &vg->vlan_list, vlist) { - struct bridge_vlan_xstats vxi; - struct br_vlan_stats stats; - if (++vl_idx < *prividx) - continue; - memset(&vxi, 0, sizeof(vxi)); - vxi.vid = v->vid; - br_vlan_get_stats(v, &stats); - vxi.rx_bytes = stats.rx_bytes; - vxi.rx_packets = stats.rx_packets; - vxi.tx_bytes = stats.tx_bytes; - vxi.tx_packets = stats.tx_packets; - - if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) + vg = br_vlan_group(br); + if (vg) { + list_for_each_entry(v, &vg->vlan_list, vlist) { + struct bridge_vlan_xstats vxi; + struct br_vlan_stats stats; + + if (++vl_idx < *prividx) + continue; + memset(&vxi, 0, sizeof(vxi)); + vxi.vid = v->vid; + br_vlan_get_stats(v, &stats); + vxi.rx_bytes = stats.rx_bytes; + vxi.rx_packets = stats.rx_packets; + vxi.tx_bytes = stats.tx_bytes; + vxi.tx_packets = stats.tx_packets; + + if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) + goto nla_put_failure; + } + } + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (++vl_idx >= *prividx) { + nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST, + sizeof(struct br_mcast_stats), + BRIDGE_XSTATS_PAD); + if (!nla) goto nla_put_failure; + br_multicast_get_stats(br, NULL, nla_data(nla)); } +#endif nla_nest_end(skb, nest); *prividx = 0; -out: + return 0; nla_put_failure: @@ -1298,6 +1344,52 @@ nla_put_failure: return -EMSGSIZE; } +static int brport_fill_linkxstats(struct sk_buff *skb, + const struct net_device *dev, + int *prividx) +{ + struct net_bridge_port *p = br_port_get_rtnl(dev); + struct nlattr *nla __maybe_unused; + struct nlattr *nest; + + if (!p) + return 0; + + nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); + if (!nest) + return -EMSGSIZE; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST, + sizeof(struct br_mcast_stats), + BRIDGE_XSTATS_PAD); + if (!nla) { + nla_nest_end(skb, nest); + return -EMSGSIZE; + } + br_multicast_get_stats(p->br, p, nla_data(nla)); +#endif + nla_nest_end(skb, nest); + + return 0; +} + +static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, + int *prividx, int attr) +{ + int ret = -EINVAL; + + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + ret = bridge_fill_linkxstats(skb, dev, prividx); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + ret = brport_fill_linkxstats(skb, dev, prividx); + break; + } + + return ret; +} + static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, .get_link_af_size = br_get_link_af_size_filtered, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 52edecf3c294..4dc851166ad1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -75,6 +75,12 @@ struct bridge_mcast_querier { struct br_ip addr; struct net_bridge_port __rcu *port; }; + +/* IGMP/MLD statistics */ +struct bridge_mcast_stats { + struct br_mcast_stats mstats; + struct u64_stats_sync syncp; +}; #endif struct br_vlan_stats { @@ -229,6 +235,7 @@ struct net_bridge_port struct bridge_mcast_own_query ip6_own_query; #endif /* IS_ENABLED(CONFIG_IPV6) */ unsigned char multicast_router; + struct bridge_mcast_stats __percpu *mcast_stats; struct timer_list multicast_router_timer; struct hlist_head mglist; struct hlist_node rlist; @@ -315,6 +322,7 @@ struct net_bridge u8 multicast_querier:1; u8 multicast_query_use_ifaddr:1; u8 has_ipv6_addr:1; + u8 multicast_stats_enabled:1; u32 hash_elasticity; u32 hash_max; @@ -337,6 +345,7 @@ struct net_bridge struct bridge_mcast_other_query ip4_other_query; struct bridge_mcast_own_query ip4_own_query; struct bridge_mcast_querier ip4_querier; + struct bridge_mcast_stats __percpu *mcast_stats; #if IS_ENABLED(CONFIG_IPV6) struct bridge_mcast_other_query ip6_other_query; struct bridge_mcast_own_query ip6_own_query; @@ -543,7 +552,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, u16 vid); struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb, u16 vid); -void br_multicast_add_port(struct net_bridge_port *port); +int br_multicast_add_port(struct net_bridge_port *port); void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); void br_multicast_disable_port(struct net_bridge_port *port); @@ -576,6 +585,12 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type, u8 flags); void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, int type); +void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, + __be16 proto, u8 type, u8 dir); +int br_multicast_init_stats(struct net_bridge *br); +void br_multicast_get_stats(const struct net_bridge *br, + const struct net_bridge_port *p, + struct br_mcast_stats *dest); #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) @@ -623,6 +638,11 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, return false; } } + +static inline int br_multicast_igmp_type(const struct sk_buff *skb) +{ + return BR_INPUT_SKB_CB(skb)->igmp; +} #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, @@ -638,8 +658,9 @@ static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, return NULL; } -static inline void br_multicast_add_port(struct net_bridge_port *port) +static inline int br_multicast_add_port(struct net_bridge_port *port) { + return 0; } static inline void br_multicast_del_port(struct net_bridge_port *port) @@ -695,6 +716,22 @@ static inline void br_mdb_init(void) static inline void br_mdb_uninit(void) { } + +static inline void br_multicast_count(struct net_bridge *br, + const struct net_bridge_port *p, + __be16 proto, u8 type, u8 dir) +{ +} + +static inline int br_multicast_init_stats(struct net_bridge *br) +{ + return 0; +} + +static inline int br_multicast_igmp_type(const struct sk_buff *skb) +{ + return 0; +} #endif /* br_vlan.c */ diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index beb47071e38d..e120307c6e36 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -618,6 +618,30 @@ static ssize_t multicast_startup_query_interval_store( return store_bridge_parm(d, buf, len, set_startup_query_interval); } static DEVICE_ATTR_RW(multicast_startup_query_interval); + +static ssize_t multicast_stats_enabled_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + + return sprintf(buf, "%u\n", br->multicast_stats_enabled); +} + +static int set_stats_enabled(struct net_bridge *br, unsigned long val) +{ + br->multicast_stats_enabled = !!val; + return 0; +} + +static ssize_t multicast_stats_enabled_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_stats_enabled); +} +static DEVICE_ATTR_RW(multicast_stats_enabled); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static ssize_t nf_call_iptables_show( @@ -784,6 +808,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_query_interval.attr, &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, + &dev_attr_multicast_stats_enabled.attr, #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) &dev_attr_nf_call_iptables.attr, diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 2a449b7ab8fa..5fc4affd9fdb 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -20,16 +20,16 @@ ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par) __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { - if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) + if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.ssap)) return false; - if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP)) + if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.dsap)) return false; } if (info->bitmask & EBT_802_3_TYPE) { if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) return false; - if (FWINV(info->type != type, EBT_802_3_TYPE)) + if (NF_INVF(info, EBT_802_3_TYPE, info->type != type)) return false; } diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index cd457b891b27..227142282b45 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -25,14 +25,14 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) return false; - if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != - ah->ar_op, EBT_ARP_OPCODE)) + if ((info->bitmask & EBT_ARP_OPCODE) && + NF_INVF(info, EBT_ARP_OPCODE, info->opcode != ah->ar_op)) return false; - if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != - ah->ar_hrd, EBT_ARP_HTYPE)) + if ((info->bitmask & EBT_ARP_HTYPE) && + NF_INVF(info, EBT_ARP_HTYPE, info->htype != ah->ar_hrd)) return false; - if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != - ah->ar_pro, EBT_ARP_PTYPE)) + if ((info->bitmask & EBT_ARP_PTYPE) && + NF_INVF(info, EBT_ARP_PTYPE, info->ptype != ah->ar_pro)) return false; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { @@ -51,21 +51,22 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(daddr), &daddr); if (dap == NULL) return false; - if (info->bitmask & EBT_ARP_SRC_IP && - FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP)) + if ((info->bitmask & EBT_ARP_SRC_IP) && + NF_INVF(info, EBT_ARP_SRC_IP, + info->saddr != (*sap & info->smsk))) return false; - if (info->bitmask & EBT_ARP_DST_IP && - FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP)) + if ((info->bitmask & EBT_ARP_DST_IP) && + NF_INVF(info, EBT_ARP_DST_IP, + info->daddr != (*dap & info->dmsk))) return false; - if (info->bitmask & EBT_ARP_GRAT && - FWINV(*dap != *sap, EBT_ARP_GRAT)) + if ((info->bitmask & EBT_ARP_GRAT) && + NF_INVF(info, EBT_ARP_GRAT, *dap != *sap)) return false; } if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { const unsigned char *mp; unsigned char _mac[ETH_ALEN]; - uint8_t verdict, i; if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) return false; @@ -74,11 +75,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(_mac), &_mac); if (mp == NULL) return false; - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (mp[i] ^ info->smaddr[i]) & - info->smmsk[i]; - if (FWINV(verdict != 0, EBT_ARP_SRC_MAC)) + if (NF_INVF(info, EBT_ARP_SRC_MAC, + !ether_addr_equal_masked(mp, info->smaddr, + info->smmsk))) return false; } @@ -88,11 +87,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(_mac), &_mac); if (mp == NULL) return false; - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (mp[i] ^ info->dmaddr[i]) & - info->dmmsk[i]; - if (FWINV(verdict != 0, EBT_ARP_DST_MAC)) + if (NF_INVF(info, EBT_ARP_DST_MAC, + !ether_addr_equal_masked(mp, info->dmaddr, + info->dmmsk))) return false; } } diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 23bca62d58d2..d06968bdf5ec 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -36,19 +36,19 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) return false; - if (info->bitmask & EBT_IP_TOS && - FWINV(info->tos != ih->tos, EBT_IP_TOS)) + if ((info->bitmask & EBT_IP_TOS) && + NF_INVF(info, EBT_IP_TOS, info->tos != ih->tos)) return false; - if (info->bitmask & EBT_IP_SOURCE && - FWINV((ih->saddr & info->smsk) != - info->saddr, EBT_IP_SOURCE)) + if ((info->bitmask & EBT_IP_SOURCE) && + NF_INVF(info, EBT_IP_SOURCE, + (ih->saddr & info->smsk) != info->saddr)) return false; if ((info->bitmask & EBT_IP_DEST) && - FWINV((ih->daddr & info->dmsk) != - info->daddr, EBT_IP_DEST)) + NF_INVF(info, EBT_IP_DEST, + (ih->daddr & info->dmsk) != info->daddr)) return false; if (info->bitmask & EBT_IP_PROTO) { - if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) + if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol)) return false; if (!(info->bitmask & EBT_IP_DPORT) && !(info->bitmask & EBT_IP_SPORT)) @@ -61,16 +61,16 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->bitmask & EBT_IP_DPORT) { u32 dst = ntohs(pptr->dst); - if (FWINV(dst < info->dport[0] || - dst > info->dport[1], - EBT_IP_DPORT)) + if (NF_INVF(info, EBT_IP_DPORT, + dst < info->dport[0] || + dst > info->dport[1])) return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); - if (FWINV(src < info->sport[0] || - src > info->sport[1], - EBT_IP_SPORT)) + if (NF_INVF(info, EBT_IP_SPORT, + src < info->sport[0] || + src > info->sport[1])) return false; } } diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 98de6e7fd86d..4617491be41e 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -45,15 +45,18 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) return false; - if (info->bitmask & EBT_IP6_TCLASS && - FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) + if ((info->bitmask & EBT_IP6_TCLASS) && + NF_INVF(info, EBT_IP6_TCLASS, + info->tclass != ipv6_get_dsfield(ih6))) return false; - if ((info->bitmask & EBT_IP6_SOURCE && - FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, - &info->saddr), EBT_IP6_SOURCE)) || - (info->bitmask & EBT_IP6_DEST && - FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, - &info->daddr), EBT_IP6_DEST))) + if (((info->bitmask & EBT_IP6_SOURCE) && + NF_INVF(info, EBT_IP6_SOURCE, + ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr))) || + ((info->bitmask & EBT_IP6_DEST) && + NF_INVF(info, EBT_IP6_DEST, + ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, + &info->daddr)))) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; @@ -63,7 +66,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off); if (offset_ph == -1) return false; - if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) + if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr)) return false; if (!(info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT | EBT_IP6_ICMP6))) @@ -76,22 +79,24 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->bitmask & EBT_IP6_DPORT) { u16 dst = ntohs(pptr->tcpudphdr.dst); - if (FWINV(dst < info->dport[0] || - dst > info->dport[1], EBT_IP6_DPORT)) + if (NF_INVF(info, EBT_IP6_DPORT, + dst < info->dport[0] || + dst > info->dport[1])) return false; } if (info->bitmask & EBT_IP6_SPORT) { u16 src = ntohs(pptr->tcpudphdr.src); - if (FWINV(src < info->sport[0] || - src > info->sport[1], EBT_IP6_SPORT)) + if (NF_INVF(info, EBT_IP6_SPORT, + src < info->sport[0] || + src > info->sport[1])) return false; } if ((info->bitmask & EBT_IP6_ICMP6) && - FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || - pptr->icmphdr.type > info->icmpv6_type[1] || - pptr->icmphdr.code < info->icmpv6_code[0] || - pptr->icmphdr.code > info->icmpv6_code[1], - EBT_IP6_ICMP6)) + NF_INVF(info, EBT_IP6_ICMP6, + pptr->icmphdr.type < info->icmpv6_type[0] || + pptr->icmphdr.type > info->icmpv6_type[1] || + pptr->icmphdr.code < info->icmpv6_code[0] || + pptr->icmphdr.code > info->icmpv6_code[1])) return false; } return true; diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 6b731e12ecfa..3140eb912d7e 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -17,24 +17,24 @@ #define BPDU_TYPE_TCN 0x80 struct stp_header { - uint8_t dsap; - uint8_t ssap; - uint8_t ctrl; - uint8_t pid; - uint8_t vers; - uint8_t type; + u8 dsap; + u8 ssap; + u8 ctrl; + u8 pid; + u8 vers; + u8 type; }; struct stp_config_pdu { - uint8_t flags; - uint8_t root[8]; - uint8_t root_cost[4]; - uint8_t sender[8]; - uint8_t port[2]; - uint8_t msg_age[2]; - uint8_t max_age[2]; - uint8_t hello_time[2]; - uint8_t forward_delay[2]; + u8 flags; + u8 root[8]; + u8 root_cost[4]; + u8 sender[8]; + u8 port[2]; + u8 msg_age[2]; + u8 max_age[2]; + u8 hello_time[2]; + u8 forward_delay[2]; }; #define NR16(p) (p[0] << 8 | p[1]) @@ -44,76 +44,73 @@ static bool ebt_filter_config(const struct ebt_stp_info *info, const struct stp_config_pdu *stpc) { const struct ebt_stp_config_info *c; - uint16_t v16; - uint32_t v32; - int verdict, i; + u16 v16; + u32 v32; c = &info->config; if ((info->bitmask & EBT_STP_FLAGS) && - FWINV(c->flags != stpc->flags, EBT_STP_FLAGS)) + NF_INVF(info, EBT_STP_FLAGS, c->flags != stpc->flags)) return false; if (info->bitmask & EBT_STP_ROOTPRIO) { v16 = NR16(stpc->root); - if (FWINV(v16 < c->root_priol || - v16 > c->root_priou, EBT_STP_ROOTPRIO)) + if (NF_INVF(info, EBT_STP_ROOTPRIO, + v16 < c->root_priol || v16 > c->root_priou)) return false; } if (info->bitmask & EBT_STP_ROOTADDR) { - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (stpc->root[2+i] ^ c->root_addr[i]) & - c->root_addrmsk[i]; - if (FWINV(verdict != 0, EBT_STP_ROOTADDR)) + if (NF_INVF(info, EBT_STP_ROOTADDR, + !ether_addr_equal_masked(&stpc->root[2], + c->root_addr, + c->root_addrmsk))) return false; } if (info->bitmask & EBT_STP_ROOTCOST) { v32 = NR32(stpc->root_cost); - if (FWINV(v32 < c->root_costl || - v32 > c->root_costu, EBT_STP_ROOTCOST)) + if (NF_INVF(info, EBT_STP_ROOTCOST, + v32 < c->root_costl || v32 > c->root_costu)) return false; } if (info->bitmask & EBT_STP_SENDERPRIO) { v16 = NR16(stpc->sender); - if (FWINV(v16 < c->sender_priol || - v16 > c->sender_priou, EBT_STP_SENDERPRIO)) + if (NF_INVF(info, EBT_STP_SENDERPRIO, + v16 < c->sender_priol || v16 > c->sender_priou)) return false; } if (info->bitmask & EBT_STP_SENDERADDR) { - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) & - c->sender_addrmsk[i]; - if (FWINV(verdict != 0, EBT_STP_SENDERADDR)) + if (NF_INVF(info, EBT_STP_SENDERADDR, + !ether_addr_equal_masked(&stpc->sender[2], + c->sender_addr, + c->sender_addrmsk))) return false; } if (info->bitmask & EBT_STP_PORT) { v16 = NR16(stpc->port); - if (FWINV(v16 < c->portl || - v16 > c->portu, EBT_STP_PORT)) + if (NF_INVF(info, EBT_STP_PORT, + v16 < c->portl || v16 > c->portu)) return false; } if (info->bitmask & EBT_STP_MSGAGE) { v16 = NR16(stpc->msg_age); - if (FWINV(v16 < c->msg_agel || - v16 > c->msg_ageu, EBT_STP_MSGAGE)) + if (NF_INVF(info, EBT_STP_MSGAGE, + v16 < c->msg_agel || v16 > c->msg_ageu)) return false; } if (info->bitmask & EBT_STP_MAXAGE) { v16 = NR16(stpc->max_age); - if (FWINV(v16 < c->max_agel || - v16 > c->max_ageu, EBT_STP_MAXAGE)) + if (NF_INVF(info, EBT_STP_MAXAGE, + v16 < c->max_agel || v16 > c->max_ageu)) return false; } if (info->bitmask & EBT_STP_HELLOTIME) { v16 = NR16(stpc->hello_time); - if (FWINV(v16 < c->hello_timel || - v16 > c->hello_timeu, EBT_STP_HELLOTIME)) + if (NF_INVF(info, EBT_STP_HELLOTIME, + v16 < c->hello_timel || v16 > c->hello_timeu)) return false; } if (info->bitmask & EBT_STP_FWDD) { v16 = NR16(stpc->forward_delay); - if (FWINV(v16 < c->forward_delayl || - v16 > c->forward_delayu, EBT_STP_FWDD)) + if (NF_INVF(info, EBT_STP_FWDD, + v16 < c->forward_delayl || v16 > c->forward_delayu)) return false; } return true; @@ -125,7 +122,7 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct ebt_stp_info *info = par->matchinfo; const struct stp_header *sp; struct stp_header _stph; - const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; + const u8 header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); if (sp == NULL) @@ -135,8 +132,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) if (memcmp(sp, header, sizeof(header))) return false; - if (info->bitmask & EBT_STP_TYPE && - FWINV(info->type != sp->type, EBT_STP_TYPE)) + if ((info->bitmask & EBT_STP_TYPE) && + NF_INVF(info, EBT_STP_TYPE, info->type != sp->type)) return false; if (sp->type == BPDU_TYPE_CONFIG && @@ -156,8 +153,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) static int ebt_stp_mt_check(const struct xt_mtchk_param *par) { const struct ebt_stp_info *info = par->matchinfo; - const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; - const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; + const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const struct ebt_entry *e = par->entryinfo; if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5a61f35412a0..cceac5bb658f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -121,7 +121,6 @@ ebt_dev_check(const char *entry, const struct net_device *device) return devname[i] != entry[i] && entry[i] != 1; } -#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg)) /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, @@ -130,7 +129,6 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, const struct ethhdr *h = eth_hdr(skb); const struct net_bridge_port *p; __be16 ethproto; - int verdict, i; if (skb_vlan_tag_present(skb)) ethproto = htons(ETH_P_8021Q); @@ -138,38 +136,36 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO)) + if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto))) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && - FWINV2(e->ethproto != ethproto, EBT_IPROTO)) + NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto)) return 1; - if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN)) + if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in))) return 1; - if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) + if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out))) return 1; /* rcu_read_lock()ed by nf_hook_slow */ if (in && (p = br_port_get_rcu(in)) != NULL && - FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN)) + NF_INVF(e, EBT_ILOGICALIN, + ebt_dev_check(e->logical_in, p->br->dev))) return 1; if (out && (p = br_port_get_rcu(out)) != NULL && - FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT)) + NF_INVF(e, EBT_ILOGICALOUT, + ebt_dev_check(e->logical_out, p->br->dev))) return 1; if (e->bitmask & EBT_SOURCEMAC) { - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (h->h_source[i] ^ e->sourcemac[i]) & - e->sourcemsk[i]; - if (FWINV2(verdict != 0, EBT_ISOURCE)) + if (NF_INVF(e, EBT_ISOURCE, + !ether_addr_equal_masked(h->h_source, e->sourcemac, + e->sourcemsk))) return 1; } if (e->bitmask & EBT_DESTMAC) { - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (h->h_dest[i] ^ e->destmac[i]) & - e->destmsk[i]; - if (FWINV2(verdict != 0, EBT_IDEST)) + if (NF_INVF(e, EBT_IDEST, + !ether_addr_equal_masked(h->h_dest, e->destmac, + e->destmsk))) return 1; } return 0; diff --git a/net/core/dev.c b/net/core/dev.c index aba10d2a8bc3..b92d63bfde7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5445,6 +5445,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) EXPORT_SYMBOL(netdev_lower_get_next); /** + * netdev_all_lower_get_next - Get the next device from all lower neighbour list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's all lower neighbour + * list, starting from iter position. The caller must hold RTNL lock or + * its own locking that guarantees that the neighbour all lower + * list will remain unchanged. + */ +struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry(*iter, struct netdev_adjacent, list); + + if (&lower->list == &dev->all_adj_list.lower) + return NULL; + + *iter = lower->list.next; + + return lower->dev; +} +EXPORT_SYMBOL(netdev_all_lower_get_next); + +/** + * netdev_all_lower_get_next_rcu - Get the next device from all + * lower neighbour list, RCU variant + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's all lower neighbour + * list, starting from iter position. The caller must hold RCU read lock. + */ +struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->all_adj_list.lower, + struct netdev_adjacent, list); + + return lower ? lower->dev : NULL; +} +EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); + +/** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU * variant @@ -6041,6 +6087,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev, } EXPORT_SYMBOL(netdev_lower_state_changed); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev, *stop_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_construct) + continue; + err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n); + if (err) { + stop_dev = lower_dev; + goto rollback; + } + } + return 0; + +rollback: + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (lower_dev == stop_dev) + break; + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } + return err; +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct); + +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; diff --git a/net/core/devlink.c b/net/core/devlink.c index 933e8d4d3968..b2e592a198c0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1394,6 +1394,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, return -EOPNOTSUPP; } +static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, u16 mode) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + struct sk_buff *msg; + u16 mode; + int err; + + if (!ops || !ops->eswitch_mode_get) + return -EOPNOTSUPP; + + err = ops->eswitch_mode_get(devlink, &mode); + if (err) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET, + info->snd_portid, info->snd_seq, 0, mode); + + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + u16 mode; + + if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) + return -EINVAL; + + mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + + if (ops && ops->eswitch_mode_set) + return ops->eswitch_mode_set(devlink, mode); + return -EOPNOTSUPP; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -1407,6 +1479,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, + [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -1525,6 +1598,20 @@ static const struct genl_ops devlink_nl_ops[] = { DEVLINK_NL_FLAG_NEED_SB | DEVLINK_NL_FLAG_LOCK_PORTS, }, + { + .cmd = DEVLINK_CMD_ESWITCH_MODE_GET, + .doit = devlink_nl_cmd_eswitch_mode_get_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_ESWITCH_MODE_SET, + .doit = devlink_nl_cmd_eswitch_mode_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; /** diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98298b11f534..be4629c344a6 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -269,6 +269,49 @@ errout: return err; } +static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, + struct nlattr **tb, struct fib_rule *rule) +{ + struct fib_rule *r; + + list_for_each_entry(r, &ops->rules_list, list) { + if (r->action != rule->action) + continue; + + if (r->table != rule->table) + continue; + + if (r->pref != rule->pref) + continue; + + if (memcmp(r->iifname, rule->iifname, IFNAMSIZ)) + continue; + + if (memcmp(r->oifname, rule->oifname, IFNAMSIZ)) + continue; + + if (r->mark != rule->mark) + continue; + + if (r->mark_mask != rule->mark_mask) + continue; + + if (r->tun_id != rule->tun_id) + continue; + + if (r->fr_net != rule->fr_net) + continue; + + if (r->l3mdev != rule->l3mdev) + continue; + + if (!ops->compare(r, frh, tb)) + continue; + return 1; + } + return 0; +} + int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); @@ -386,6 +429,12 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (rule->l3mdev && rule->table) goto errout_free; + if ((nlh->nlmsg_flags & NLM_F_EXCL) && + rule_exists(ops, frh, tb, rule)) { + err = -EEXIST; + goto errout_free; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; diff --git a/net/core/filter.c b/net/core/filter.c index cb9fc16cac46..10c4a2f9e8bb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -150,6 +150,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return raw_smp_processor_id(); } +static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { + .func = __get_raw_cpu_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, struct bpf_insn *insn_buf) { @@ -1295,21 +1301,10 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) { - struct bpf_prog *prog; - if (sock_flag(sk, SOCK_FILTER_LOCKED)) return ERR_PTR(-EPERM); - prog = bpf_prog_get(ufd); - if (IS_ERR(prog)) - return prog; - - if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { - bpf_prog_put(prog); - return ERR_PTR(-EINVAL); - } - - return prog; + return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); } int sk_attach_bpf(u32 ufd, struct sock *sk) @@ -1734,6 +1729,23 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* If skb_clear_hash() was called due to mangling, we can + * trigger SW recalculation here. Later access to hash + * can then use the inline skb->hash via context directly + * instead of calling this helper again. + */ + return skb_get_hash((struct sk_buff *) (unsigned long) r1); +} + +static const struct bpf_func_proto bpf_get_hash_recalc_proto = { + .func = bpf_get_hash_recalc, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; @@ -1777,6 +1789,224 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = { }; EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); +static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) +{ + /* Caller already did skb_cow() with len as headroom, + * so no need to do it here. + */ + skb_push(skb, len); + memmove(skb->data, skb->data + len, off); + memset(skb->data + off, 0, len); + + /* No skb_postpush_rcsum(skb, skb->data + off, len) + * needed here as it does not change the skb->csum + * result for checksum complete when summing over + * zeroed blocks. + */ + return 0; +} + +static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) +{ + /* skb_ensure_writable() is not needed here, as we're + * already working on an uncloned skb. + */ + if (unlikely(!pskb_may_pull(skb, off + len))) + return -ENOMEM; + + skb_postpull_rcsum(skb, skb->data + off, len); + memmove(skb->data + len, skb->data, off); + __skb_pull(skb, len); + + return 0; +} + +static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len) +{ + bool trans_same = skb->transport_header == skb->network_header; + int ret; + + /* There's no need for __skb_push()/__skb_pull() pair to + * get to the start of the mac header as we're guaranteed + * to always start from here under eBPF. + */ + ret = bpf_skb_generic_push(skb, off, len); + if (likely(!ret)) { + skb->mac_header -= len; + skb->network_header -= len; + if (trans_same) + skb->transport_header = skb->network_header; + } + + return ret; +} + +static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) +{ + bool trans_same = skb->transport_header == skb->network_header; + int ret; + + /* Same here, __skb_push()/__skb_pull() pair not needed. */ + ret = bpf_skb_generic_pop(skb, off, len); + if (likely(!ret)) { + skb->mac_header += len; + skb->network_header += len; + if (trans_same) + skb->transport_header = skb->network_header; + } + + return ret; +} + +static int bpf_skb_proto_4_to_6(struct sk_buff *skb) +{ + const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); + u32 off = skb->network_header - skb->mac_header; + int ret; + + ret = skb_cow(skb, len_diff); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_push(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to + * be changed into SKB_GSO_TCPV6. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + } + + /* Due to IPv6 header, MSS needs to be downgraded. */ + skb_shinfo(skb)->gso_size -= len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + skb->protocol = htons(ETH_P_IPV6); + skb_clear_hash(skb); + + return 0; +} + +static int bpf_skb_proto_6_to_4(struct sk_buff *skb) +{ + const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); + u32 off = skb->network_header - skb->mac_header; + int ret; + + ret = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_pop(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to + * be changed into SKB_GSO_TCPV4. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { + skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + } + + /* Due to IPv4 header, MSS can be upgraded. */ + skb_shinfo(skb)->gso_size += len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + skb->protocol = htons(ETH_P_IP); + skb_clear_hash(skb); + + return 0; +} + +static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) +{ + __be16 from_proto = skb->protocol; + + if (from_proto == htons(ETH_P_IP) && + to_proto == htons(ETH_P_IPV6)) + return bpf_skb_proto_4_to_6(skb); + + if (from_proto == htons(ETH_P_IPV6) && + to_proto == htons(ETH_P_IP)) + return bpf_skb_proto_6_to_4(skb); + + return -ENOTSUPP; +} + +static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + __be16 proto = (__force __be16) r2; + int ret; + + if (unlikely(flags)) + return -EINVAL; + + /* General idea is that this helper does the basic groundwork + * needed for changing the protocol, and eBPF program fills the + * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace() + * and other helpers, rather than passing a raw buffer here. + * + * The rationale is to keep this minimal and without a need to + * deal with raw packet data. F.e. even if we would pass buffers + * here, the program still needs to call the bpf_lX_csum_replace() + * helpers anyway. Plus, this way we keep also separation of + * concerns, since f.e. bpf_skb_store_bytes() should only take + * care of stores. + * + * Currently, additional options and extension header space are + * not supported, but flags register is reserved so we can adapt + * that. For offloads, we mark packet as dodgy, so that headers + * need to be verified first. + */ + ret = bpf_skb_proto_xlat(skb, proto); + bpf_compute_data_end(skb); + return ret; +} + +static const struct bpf_func_proto bpf_skb_change_proto_proto = { + .func = bpf_skb_change_proto, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + +static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + u32 pkt_type = r2; + + /* We only allow a restricted subset to be changed for now. */ + if (unlikely(skb->pkt_type > PACKET_OTHERHOST || + pkt_type > PACKET_OTHERHOST)) + return -EINVAL; + + skb->pkt_type = pkt_type; + return 0; +} + +static const struct bpf_func_proto bpf_skb_change_type_proto = { + .func = bpf_skb_change_type, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_skb_data(void *func) { if (func == bpf_skb_vlan_push) @@ -1785,6 +2015,8 @@ bool bpf_helper_changes_skb_data(void *func) return true; if (func == bpf_skb_store_bytes) return true; + if (func == bpf_skb_change_proto) + return true; if (func == bpf_l3_csum_replace) return true; if (func == bpf_l4_csum_replace) @@ -2024,6 +2256,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } +#ifdef CONFIG_SOCK_CGROUP_DATA +static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long)r1; + struct bpf_map *map = (struct bpf_map *)(long)r2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + struct sock *sk; + u32 i = (u32)r3; + + sk = skb->sk; + if (!sk || !sk_fullsock(sk)) + return -ENOENT; + + if (unlikely(i >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[i]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); +} + +static const struct bpf_func_proto bpf_skb_in_cgroup_proto = { + .func = bpf_skb_in_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; +#endif + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -2037,7 +2303,7 @@ sk_filter_func_proto(enum bpf_func_id func_id) case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: - return &bpf_get_smp_processor_id_proto; + return &bpf_get_raw_smp_processor_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: @@ -2072,6 +2338,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_vlan_push_proto; case BPF_FUNC_skb_vlan_pop: return &bpf_skb_vlan_pop_proto; + case BPF_FUNC_skb_change_proto: + return &bpf_skb_change_proto_proto; + case BPF_FUNC_skb_change_type: + return &bpf_skb_change_type_proto; case BPF_FUNC_skb_get_tunnel_key: return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: @@ -2084,8 +2354,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_redirect_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; + case BPF_FUNC_get_hash_recalc: + return &bpf_get_hash_recalc_proto; case BPF_FUNC_perf_event_output: return bpf_get_event_output_proto(); + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; +#ifdef CONFIG_SOCK_CGROUP_DATA + case BPF_FUNC_skb_in_cgroup: + return &bpf_skb_in_cgroup_proto; +#endif default: return sk_filter_func_proto(func_id); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a669dea146c6..61ad43f61c5e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; + +u32 __skb_get_hash_symmetric(struct sk_buff *skb) +{ + struct flow_keys keys; + + __flow_hash_secret_init(); + + memset(&keys, 0, sizeof(keys)); + __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, + NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + + return __flow_hash_from_keys(&keys, hashrnd); +} +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }, }; +static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, @@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_dissector, flow_keys_dissector_keys, ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_dissector_symmetric, + flow_keys_dissector_symmetric_keys, + ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); skb_flow_dissector_init(&flow_keys_buf_dissector, flow_keys_buf_dissector_keys, ARRAY_SIZE(flow_keys_buf_dissector_keys)); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 510cd62fcb99..5cdc62a8eb84 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, } if (dev->netdev_ops->ndo_neigh_construct) { - error = dev->netdev_ops->ndo_neigh_construct(n); + error = dev->netdev_ops->ndo_neigh_construct(dev, n); if (error < 0) { rc = ERR_PTR(error); goto out_neigh_release; @@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh) neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) - dev->netdev_ops->ndo_neigh_destroy(neigh); + dev->netdev_ops->ndo_neigh_destroy(dev, neigh); dev_put(dev); neigh_parms_put(neigh->parms); @@ -2047,6 +2047,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) case NDTPA_DELAY_PROBE_TIME: NEIGH_VAR_SET(p, DELAY_PROBE_TIME, nla_get_msecs(tbp[i])); + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); break; case NDTPA_RETRANS_TIME: NEIGH_VAR_SET(p, RETRANS_TIME, @@ -2930,6 +2931,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) return; set_bit(index, p->data_state); + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); if (!dev) /* NULL dev means this is default value */ neigh_copy_dflt_parms(net, p, index); } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7a0b616557ab..6e4f34721080 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -322,7 +322,20 @@ NETDEVICE_SHOW_RW(flags, fmt_hex); static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) { - dev->tx_queue_len = new_len; + int res, orig_len = dev->tx_queue_len; + + if (new_len != orig_len) { + dev->tx_queue_len = new_len; + res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); + res = notifier_to_errno(res); + if (res) { + netdev_err(dev, + "refused to change device tx_queue_len\n"); + dev->tx_queue_len = orig_len; + return -EFAULT; + } + } + return 0; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f74ab9c3b38f..bbd118b19aef 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -213,6 +213,7 @@ /* Xmit modes */ #define M_START_XMIT 0 /* Default normal TX */ #define M_NETIF_RECEIVE 1 /* Inject packets into stack */ +#define M_QUEUE_XMIT 2 /* Inject packet into qdisc */ /* If lock -- protects updating of if_list */ #define if_lock(t) spin_lock(&(t->if_lock)); @@ -626,6 +627,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) seq_puts(seq, " xmit_mode: netif_receive\n"); + else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) + seq_puts(seq, " xmit_mode: xmit_queue\n"); seq_puts(seq, " Flags: "); @@ -1142,8 +1145,10 @@ static ssize_t pktgen_if_write(struct file *file, return len; i += len; - if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) && - (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + if ((value > 1) && + ((pkt_dev->xmit_mode == M_QUEUE_XMIT) || + ((pkt_dev->xmit_mode == M_START_XMIT) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))))) return -ENOTSUPP; pkt_dev->burst = value < 1 ? 1 : value; sprintf(pg_result, "OK: burst=%d", pkt_dev->burst); @@ -1198,6 +1203,9 @@ static ssize_t pktgen_if_write(struct file *file, * at module loading time */ pkt_dev->clone_skb = 0; + } else if (strcmp(f, "queue_xmit") == 0) { + pkt_dev->xmit_mode = M_QUEUE_XMIT; + pkt_dev->last_ok = 1; } else { sprintf(pg_result, "xmit_mode -:%s:- unknown\nAvailable modes: %s", @@ -3434,6 +3442,36 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) #endif } while (--burst > 0); goto out; /* Skips xmit_mode M_START_XMIT */ + } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { + local_bh_disable(); + atomic_inc(&pkt_dev->skb->users); + + ret = dev_queue_xmit(pkt_dev->skb); + switch (ret) { + case NET_XMIT_SUCCESS: + pkt_dev->sofar++; + pkt_dev->seq_num++; + pkt_dev->tx_bytes += pkt_dev->last_pkt_size; + break; + case NET_XMIT_DROP: + case NET_XMIT_CN: + /* These are all valid return codes for a qdisc but + * indicate packets are being dropped or will likely + * be dropped soon. + */ + case NETDEV_TX_BUSY: + /* qdisc may call dev_hard_start_xmit directly in cases + * where no queues exist e.g. loopback device, virtual + * devices, etc. In this case we need to handle + * NETDEV_TX_ codes. + */ + default: + pkt_dev->errors++; + net_info_ratelimited("%s xmit error: %d\n", + pkt_dev->odevname, ret); + break; + } + goto out; } txq = skb_get_tx_queue(odev, pkt_dev->skb); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eb49ca24274a..a9e3805af739 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1927,11 +1927,19 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_TXQLEN]) { unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]); - - if (dev->tx_queue_len ^ value) + unsigned long orig_len = dev->tx_queue_len; + + if (dev->tx_queue_len ^ value) { + dev->tx_queue_len = value; + err = call_netdevice_notifiers( + NETDEV_CHANGE_TX_QUEUE_LEN, dev); + err = notifier_to_errno(err); + if (err) { + dev->tx_queue_len = orig_len; + goto errout; + } status |= DO_SETLINK_NOTIFY; - - dev->tx_queue_len = value; + } } if (tb[IFLA_OPERSTATE]) @@ -3519,7 +3527,32 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (!attr) goto nla_put_failure; - err = ops->fill_linkxstats(skb, dev, prividx); + err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); + nla_nest_end(skb, attr); + if (err) + goto nla_put_failure; + *idxattr = 0; + } + } + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, + *idxattr)) { + const struct rtnl_link_ops *ops = NULL; + const struct net_device *master; + + master = netdev_master_upper_dev_get(dev); + if (master) + ops = master->rtnl_link_ops; + if (ops && ops->fill_linkxstats) { + int err; + + *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; + attr = nla_nest_start(skb, + IFLA_STATS_LINK_XSTATS_SLAVE); + if (!attr) + goto nla_put_failure; + + err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); if (err) goto nla_put_failure; @@ -3555,14 +3588,35 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + int attr = IFLA_STATS_LINK_XSTATS; if (ops && ops->get_linkxstats_size) { - size += nla_total_size(ops->get_linkxstats_size(dev)); + size += nla_total_size(ops->get_linkxstats_size(dev, + attr)); /* for IFLA_STATS_LINK_XSTATS */ size += nla_total_size(0); } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) { + struct net_device *_dev = (struct net_device *)dev; + const struct rtnl_link_ops *ops = NULL; + const struct net_device *master; + + /* netdev_master_upper_dev_get can't take const */ + master = netdev_master_upper_dev_get(_dev); + if (master) + ops = master->rtnl_link_ops; + if (ops && ops->get_linkxstats_size) { + int attr = IFLA_STATS_LINK_XSTATS_SLAVE; + + size += nla_total_size(ops->get_linkxstats_size(dev, + attr)); + /* for IFLA_STATS_LINK_XSTATS_SLAVE */ + size += nla_total_size(0); + } + } + return size; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e7ec6d3ad5f0..3864b4b68fa1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3017,24 +3017,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, EXPORT_SYMBOL_GPL(skb_append_pagefrags); /** - * skb_push_rcsum - push skb and update receive checksum - * @skb: buffer to update - * @len: length of data pulled - * - * This function performs an skb_push on the packet and updates - * the CHECKSUM_COMPLETE checksum. It should be used on - * receive path processing instead of skb_push unless you know - * that the checksum difference is zero (e.g., a valid IP header) - * or you are setting ip_summed to CHECKSUM_NONE. - */ -static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len) -{ - skb_push(skb, len); - skb_postpush_rcsum(skb, skb->data, len); - return skb->data; -} - -/** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update * @len: length of data pulled diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index df4803437888..a796fc7cbc35 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -41,6 +41,7 @@ #include <net/dn_fib.h> #include <net/dn_neigh.h> #include <net/dn_dev.h> +#include <net/nexthop.h> #define RT_MIN_TABLE 1 @@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr) struct rtnexthop *nhp = nla_data(attr); int nhs = 0, nhlen = nla_len(attr); - while(nhlen >= (int)sizeof(struct rtnexthop)) { - if ((nhlen -= nhp->rtnh_len) < 0) - return 0; + while (rtnh_ok(nhp, nhlen)) { nhs++; - nhp = RTNH_NEXT(nhp); + nhp = rtnh_next(nhp, &nhlen); } - return nhs; + /* leftover implies invalid nexthop configuration, discard it */ + return nhlen > 0 ? 0 : nhs; } static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, @@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, int nhlen = nla_len(attr); change_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + int attrlen; + + if (!rtnh_ok(nhp, nhlen)) return -EINVAL; nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; nh->nh_oif = nhp->rtnh_ifindex; nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) { + attrlen = rtnh_attrlen(nhp); + if (attrlen > 0) { struct nlattr *gw_attr; gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; } - nhp = RTNH_NEXT(nhp); + + nhp = rtnh_next(nhp, &nhlen); } endfor_nexthops(fi); return 0; diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 8c004a0c8d64..935ab932e841 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -81,7 +81,7 @@ static int lowpan_stop(struct net_device *dev) return 0; } -static int lowpan_neigh_construct(struct neighbour *n) +static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n) { struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index cbac493c913a..e23f141c9ba5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -271,7 +271,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk return dst_output(net, sk, skb); } #endif - mtu = ip_skb_dst_mtu(skb); + mtu = ip_skb_dst_mtu(sk, skb); if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); @@ -541,7 +541,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, iph = ip_hdr(skb); - mtu = ip_skb_dst_mtu(skb); + mtu = ip_skb_dst_mtu(sk, skb); if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) mtu = IPCB(skb)->frag_max_size; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 2033f929aa66..c8dd9e26b185 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -89,22 +89,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr, __be32 src_ipaddr, tgt_ipaddr; long ret; -#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) - - if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, - ARPT_INV_ARPOP)) + if (NF_INVF(arpinfo, ARPT_INV_ARPOP, + (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop)) return 0; - if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd, - ARPT_INV_ARPHRD)) + if (NF_INVF(arpinfo, ARPT_INV_ARPHRD, + (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd)) return 0; - if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro, - ARPT_INV_ARPPRO)) + if (NF_INVF(arpinfo, ARPT_INV_ARPPRO, + (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro)) return 0; - if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln, - ARPT_INV_ARPHLN)) + if (NF_INVF(arpinfo, ARPT_INV_ARPHLN, + (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln)) return 0; src_devaddr = arpptr; @@ -115,31 +113,32 @@ static inline int arp_packet_match(const struct arphdr *arphdr, arpptr += dev->addr_len; memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); - if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len), - ARPT_INV_SRCDEVADDR) || - FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len), - ARPT_INV_TGTDEVADDR)) + if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR, + arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, + dev->addr_len)) || + NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR, + arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, + dev->addr_len))) return 0; - if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr, - ARPT_INV_SRCIP) || - FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr), - ARPT_INV_TGTIP)) + if (NF_INVF(arpinfo, ARPT_INV_SRCIP, + (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) || + NF_INVF(arpinfo, ARPT_INV_TGTIP, + (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr)) return 0; /* Look for ifname matches. */ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_IN)) + if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0)) return 0; ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) + if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0)) return 0; return 1; -#undef FWINV } static inline int arp_checkentry(const struct arpt_arp *arp) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 54906e0e8e0c..f0df66f54ce6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -58,32 +58,31 @@ ip_packet_match(const struct iphdr *ip, { unsigned long ret; -#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) - - if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, - IPT_INV_SRCIP) || - FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, - IPT_INV_DSTIP)) + if (NF_INVF(ipinfo, IPT_INV_SRCIP, + (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) || + NF_INVF(ipinfo, IPT_INV_DSTIP, + (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr)) return false; ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_IN)) + if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_OUT)) + if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0)) return false; /* Check specific protocol */ if (ipinfo->proto && - FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) + NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto)) return false; /* If we have a fragment rule but the packet is not a fragment * then we return zero */ - if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) + if (NF_INVF(ipinfo, IPT_INV_FRAG, + (ipinfo->flags & IPT_F_FRAG) && !isfrag)) return false; return true; @@ -122,7 +121,6 @@ static inline bool unconditional(const struct ipt_entry *e) return e->target_offset == sizeof(struct ipt_entry) && memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; -#undef FWINV } /* for const-correctness */ diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 57fc97cdac70..aebdb337fd7e 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -87,10 +87,6 @@ iptable_mangle_hook(void *priv, { if (state->hook == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, state); - if (state->hook == NF_INET_POST_ROUTING) - return ipt_do_table(skb, state, - state->net->ipv4.iptable_mangle); - /* PREROUTING/INPUT/FORWARD: */ return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index b6ea57ec5e14..fd8220213afc 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -24,6 +24,9 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) return NULL; + if (ip_hdr(oldskb)->protocol != IPPROTO_TCP) + return NULL; + oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), sizeof(struct tcphdr), _oth); if (oth == NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5c7ed147449c..032a96d78c99 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk) ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); } +static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) +{ + struct tcp_repair_window opt; + + if (!tp->repair) + return -EPERM; + + if (len != sizeof(opt)) + return -EINVAL; + + if (copy_from_user(&opt, optbuf, sizeof(opt))) + return -EFAULT; + + if (opt.max_window < opt.snd_wnd) + return -EINVAL; + + if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd)) + return -EINVAL; + + if (after(opt.rcv_wup, tp->rcv_nxt)) + return -EINVAL; + + tp->snd_wl1 = opt.snd_wl1; + tp->snd_wnd = opt.snd_wnd; + tp->max_window = opt.max_window; + + tp->rcv_wnd = opt.rcv_wnd; + tp->rcv_wup = opt.rcv_wup; + + return 0; +} + static int tcp_repair_options_est(struct tcp_sock *tp, struct tcp_repair_opt __user *optbuf, unsigned int len) { @@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else tp->tsoffset = val - tcp_time_stamp; break; + case TCP_REPAIR_WINDOW: + err = tcp_repair_set_window(tp, optval, optlen); + break; case TCP_NOTSENT_LOWAT: tp->notsent_lowat = val; sk->sk_write_space(sk); @@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EINVAL; break; + case TCP_REPAIR_WINDOW: { + struct tcp_repair_window opt; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len != sizeof(opt)) + return -EINVAL; + + if (!tp->repair) + return -EPERM; + + opt.snd_wl1 = tp->snd_wl1; + opt.snd_wnd = tp->snd_wnd; + opt.max_window = tp->max_window; + opt.rcv_wnd = tp->rcv_wnd; + opt.rcv_wup = tp->rcv_wup; + + if (copy_to_user(optval, &opt, len)) + return -EFAULT; + return 0; + } case TCP_QUEUE_SEQ: if (tp->repair_queue == TCP_SEND_QUEUE) val = tp->write_seq; @@ -2969,8 +3026,18 @@ static void __tcp_alloc_md5sig_pool(void) return; for_each_possible_cpu(cpu) { + void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch; struct ahash_request *req; + if (!scratch) { + scratch = kmalloc_node(sizeof(union tcp_md5sum_block) + + sizeof(struct tcphdr), + GFP_KERNEL, + cpu_to_node(cpu)); + if (!scratch) + return; + per_cpu(tcp_md5sig_pool, cpu).scratch = scratch; + } if (per_cpu(tcp_md5sig_pool, cpu).md5_req) continue; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3708de2a6683..32b048e524d6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1018,27 +1018,28 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, GFP_KERNEL); } -static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, - __be32 daddr, __be32 saddr, int nbytes) +static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, + __be32 daddr, __be32 saddr, + const struct tcphdr *th, int nbytes) { struct tcp4_pseudohdr *bp; struct scatterlist sg; + struct tcphdr *_th; - bp = &hp->md5_blk.ip4; - - /* - * 1. the TCP pseudo-header (in the order: source IP address, - * destination IP address, zero-padded protocol number, and - * segment length) - */ + bp = hp->scratch; bp->saddr = saddr; bp->daddr = daddr; bp->pad = 0; bp->protocol = IPPROTO_TCP; bp->len = cpu_to_be16(nbytes); - sg_init_one(&sg, bp, sizeof(*bp)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + _th = (struct tcphdr *)(bp + 1); + memcpy(_th, th, sizeof(*th)); + _th->check = 0; + + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, + sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->md5_req); } @@ -1055,9 +1056,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; @@ -1101,9 +1100,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) goto clear_hash; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bcef2369d64..771be1fa4176 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) } } + free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 63e06c3dd319..61ed95054efa 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -73,22 +73,22 @@ ip6_packet_match(const struct sk_buff *skb, unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); -#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) - - if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, - &ip6info->src), IP6T_INV_SRCIP) || - FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, - &ip6info->dst), IP6T_INV_DSTIP)) + if (NF_INVF(ip6info, IP6T_INV_SRCIP, + ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, + &ip6info->src)) || + NF_INVF(ip6info, IP6T_INV_DSTIP, + ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, + &ip6info->dst))) return false; ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_IN)) + if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) + if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0)) return false; /* ... might want to do something with class and flowlabel here ... */ diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index cb2b28883252..2b1a9dcdbcb3 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -83,10 +83,6 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, { if (state->hook == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, state); - if (state->hook == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, state, - state->net->ipv6.ip6table_mangle); - /* INPUT/FORWARD */ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2255d2bf5f6b..37cf91323319 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -526,26 +526,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } -static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, - const struct in6_addr *daddr, - const struct in6_addr *saddr, int nbytes) +static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + const struct tcphdr *th, int nbytes) { struct tcp6_pseudohdr *bp; struct scatterlist sg; + struct tcphdr *_th; - bp = &hp->md5_blk.ip6; + bp = hp->scratch; /* 1. TCP pseudo-header (RFC2460) */ bp->saddr = *saddr; bp->daddr = *daddr; bp->protocol = cpu_to_be32(IPPROTO_TCP); bp->len = cpu_to_be32(nbytes); - sg_init_one(&sg, bp, sizeof(*bp)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + _th = (struct tcphdr *)(bp + 1); + memcpy(_th, th, sizeof(*th)); + _th->check = 0; + + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, + sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->md5_req); } -static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { @@ -559,9 +566,7 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; @@ -606,9 +611,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) goto clear_hash; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 0b68ba730a06..cb39e05b166c 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1765,18 +1765,12 @@ static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) if (!csock) return -ENOENT; - prog = bpf_prog_get(info->bpf_fd); + prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto out; } - if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { - bpf_prog_put(prog); - err = -EINVAL; - goto out; - } - err = kcm_attach(sock, csock, prog); if (err) { bpf_prog_put(prog); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 95e757c377f9..9266ceebd112 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -609,9 +609,8 @@ config NETFILTER_XT_MARK The target allows you to create rules in the "mangle" table which alter the netfilter mark (nfmark) field associated with the packet. - Prior to routing, the nfmark can influence the routing method (see - "Use netfilter MARK value as routing key") and can also be used by - other subsystems to change their behavior. + Prior to routing, the nfmark can influence the routing method and can + also be used by other subsystems to change their behavior. config NETFILTER_XT_CONNMARK tristate 'ctmark target and match support' @@ -753,9 +752,8 @@ config NETFILTER_XT_TARGET_HMARK The target allows you to create rules in the "raw" and "mangle" tables which set the skbuff mark by means of hash calculation within a given - range. The nfmark can influence the routing method (see "Use netfilter - MARK value as routing key") and can also be used by other subsystems to - change their behaviour. + range. The nfmark can influence the routing method and can also be used + by other subsystems to change their behaviour. To compile it as a module, choose M here. If unsure, say N. diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f204274a9b6b..153e33ffeeaa 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -327,16 +327,10 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); - - if (nf_ct_zone_add(tmpl, flags, zone) < 0) - goto out_free; - + nf_ct_zone_add(tmpl, zone); atomic_set(&tmpl->ct_general.use, 0); return tmpl; -out_free: - kfree(tmpl); - return NULL; } EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); @@ -929,16 +923,13 @@ __nf_conntrack_alloc(struct net *net, offsetof(struct nf_conn, proto) - offsetof(struct nf_conn, __nfct_init_offset[0])); - if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0) - goto out_free; + nf_ct_zone_add(ct, zone); /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ atomic_set(&ct->ct_general.use, 0); return ct; -out_free: - kmem_cache_free(nf_conntrack_cachep, ct); out: atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); @@ -1342,14 +1333,6 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); -#ifdef CONFIG_NF_CONNTRACK_ZONES -static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { - .len = sizeof(struct nf_conntrack_zone), - .align = __alignof__(struct nf_conntrack_zone), - .id = NF_CT_EXT_ZONE, -}; -#endif - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> @@ -1532,9 +1515,6 @@ void nf_conntrack_cleanup_end(void) nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); -#ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_extend_unregister(&nf_ct_zone_extend); -#endif nf_conntrack_proto_fini(); nf_conntrack_seqadj_fini(); nf_conntrack_labels_fini(); @@ -1617,24 +1597,14 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) } EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); -int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) +int nf_conntrack_hash_resize(unsigned int hashsize) { - int i, bucket, rc; - unsigned int hashsize, old_size; + int i, bucket; + unsigned int old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; - if (current->nsproxy->net_ns != &init_net) - return -EOPNOTSUPP; - - /* On boot, we can set this without any fancy locking. */ - if (!nf_conntrack_htable_size) - return param_set_uint(val, kp); - - rc = kstrtouint(val, 0, &hashsize); - if (rc) - return rc; if (!hashsize) return -EINVAL; @@ -1642,6 +1612,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; + old_size = nf_conntrack_htable_size; + if (old_size == hashsize) { + nf_ct_free_hashtable(hash, hashsize); + return 0; + } + local_bh_disable(); nf_conntrack_all_lock(); write_seqcount_begin(&nf_conntrack_generation); @@ -1677,6 +1653,25 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) nf_ct_free_hashtable(old_hash, old_size); return 0; } + +int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) +{ + unsigned int hashsize; + int rc; + + if (current->nsproxy->net_ns != &init_net) + return -EOPNOTSUPP; + + /* On boot, we can set this without any fancy locking. */ + if (!nf_conntrack_htable_size) + return param_set_uint(val, kp); + + rc = kstrtouint(val, 0, &hashsize); + if (rc) + return rc; + + return nf_conntrack_hash_resize(hashsize); +} EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, @@ -1733,7 +1728,7 @@ int nf_conntrack_init_start(void) nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), 0, - SLAB_DESTROY_BY_RCU, NULL); + SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; @@ -1773,11 +1768,6 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_seqadj; -#ifdef CONFIG_NF_CONNTRACK_ZONES - ret = nf_ct_extend_register(&nf_ct_zone_extend); - if (ret < 0) - goto err_extend; -#endif ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; @@ -1793,10 +1783,6 @@ int nf_conntrack_init_start(void) return 0; err_proto: -#ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_extend_unregister(&nf_ct_zone_extend); -err_extend: -#endif nf_conntrack_seqadj_fini(); err_seqadj: nf_conntrack_labels_fini(); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 196cb39649e1..3a1a88b9bafa 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -389,11 +389,38 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, struct net *net) { struct nf_conntrack_tuple_hash *h; + const struct hlist_nulls_node *nn; + int cpu; + + /* Get rid of expecteds, set helpers to NULL. */ + for_each_possible_cpu(cpu) { + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + + spin_lock_bh(&pcpu->lock); + hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) + unhelp(h, me); + spin_unlock_bh(&pcpu->lock); + } +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ + struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; const struct hlist_node *next; const struct hlist_nulls_node *nn; + struct net *net; unsigned int i; - int cpu; + + mutex_lock(&nf_ct_helper_mutex); + hlist_del_rcu(&me->hnode); + nf_ct_helper_count--; + mutex_unlock(&nf_ct_helper_mutex); + + /* Make sure every nothing is still using the helper unless its a + * connection in the hash. + */ + synchronize_rcu(); /* Get rid of expectations */ spin_lock_bh(&nf_conntrack_expect_lock); @@ -413,15 +440,11 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } spin_unlock_bh(&nf_conntrack_expect_lock); - /* Get rid of expecteds, set helpers to NULL. */ - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + rtnl_lock(); + for_each_net(net) + __nf_conntrack_helper_unregister(me, net); + rtnl_unlock(); - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) - unhelp(h, me); - spin_unlock_bh(&pcpu->lock); - } local_bh_disable(); for (i = 0; i < nf_conntrack_htable_size; i++) { nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -433,26 +456,6 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } local_bh_enable(); } - -void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) -{ - struct net *net; - - mutex_lock(&nf_ct_helper_mutex); - hlist_del_rcu(&me->hnode); - nf_ct_helper_count--; - mutex_unlock(&nf_ct_helper_mutex); - - /* Make sure every nothing is still using the helper unless its a - * connection in the hash. - */ - synchronize_rcu(); - - rtnl_lock(); - for_each_net(net) - __nf_conntrack_helper_unregister(me, net); - rtnl_unlock(); -} EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); static struct nf_ct_ext_type helper_extend __read_mostly = { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index c026c472ea80..2aaa188ee961 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -434,8 +434,29 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) #ifdef CONFIG_SYSCTL /* Log invalid packets of a given protocol */ -static int log_invalid_proto_min = 0; -static int log_invalid_proto_max = 255; +static int log_invalid_proto_min __read_mostly; +static int log_invalid_proto_max __read_mostly = 255; + +/* size the user *wants to set */ +static unsigned int nf_conntrack_htable_size_user __read_mostly; + +static int +nf_conntrack_hash_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (ret < 0 || !write) + return ret; + + /* update ret, we might not be able to satisfy request */ + ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user); + + /* update it to the actual value used by conntrack */ + nf_conntrack_htable_size_user = nf_conntrack_htable_size; + return ret; +} static struct ctl_table_header *nf_ct_netfilter_header; @@ -456,10 +477,10 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &nf_conntrack_htable_size_user, .maxlen = sizeof(unsigned int), - .mode = 0444, - .proc_handler = proc_dointvec, + .mode = 0644, + .proc_handler = nf_conntrack_hash_sysctl, }, { .procname = "nf_conntrack_checksum", @@ -515,6 +536,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) if (net->user_ns != &init_user_ns) table[0].procname = NULL; + if (!net_eq(&init_net, net)) + table[2].mode = 0444; + net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header) goto out_unregister_netfilter; @@ -604,6 +628,8 @@ static int __init nf_conntrack_standalone_init(void) ret = -ENOMEM; goto out_sysctl; } + + nf_conntrack_htable_size_user = nf_conntrack_htable_size; #endif ret = register_pernet_subsys(&nf_conntrack_net_ops); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index a5d41dfa9f05..aa5847a16713 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -159,6 +159,20 @@ int nf_logger_find_get(int pf, enum nf_log_type type) struct nf_logger *logger; int ret = -ENOENT; + if (pf == NFPROTO_INET) { + ret = nf_logger_find_get(NFPROTO_IPV4, type); + if (ret < 0) + return ret; + + ret = nf_logger_find_get(NFPROTO_IPV6, type); + if (ret < 0) { + nf_logger_put(NFPROTO_IPV4, type); + return ret; + } + + return 0; + } + if (rcu_access_pointer(loggers[pf][type]) == NULL) request_module("nf-logger-%u-%u", pf, type); @@ -167,7 +181,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type) if (logger == NULL) goto out; - if (logger && try_module_get(logger->me)) + if (try_module_get(logger->me)) ret = 0; out: rcu_read_unlock(); @@ -179,6 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) { struct nf_logger *logger; + if (pf == NFPROTO_INET) { + nf_logger_put(NFPROTO_IPV4, type); + nf_logger_put(NFPROTO_IPV6, type); + return; + } + BUG_ON(loggers[pf][type] == NULL); rcu_read_lock(); @@ -398,16 +418,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, { const struct nf_logger *logger; char buf[NFLOGGER_NAME_LEN]; - size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; struct net *net = current->nsproxy->net_ns; if (write) { - if (size > sizeof(buf)) - size = sizeof(buf); - if (copy_from_user(buf, buffer, size)) - return -EFAULT; + struct ctl_table tmp = *table; + + tmp.data = buf; + r = proc_dostring(&tmp, write, buffer, lenp, ppos); + if (r) + return r; if (!strcmp(buf, "NONE")) { nf_log_unbind_pf(net, tindex); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2c881871db38..18b7f8578ee0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,29 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) -{ - struct net *net = read_pnet(&basechain->pnet); - - if (basechain->flags & NFT_BASECHAIN_DISABLED) - return 0; - - return nf_register_net_hooks(net, basechain->ops, hook_nops); -} - -static void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) -{ - struct net *net = read_pnet(&basechain->pnet); - - if (basechain->flags & NFT_BASECHAIN_DISABLED) - return; - - nf_unregister_net_hooks(net, basechain->ops, hook_nops); -} - -static int nf_tables_register_hooks(const struct nft_table *table, +static int nf_tables_register_hooks(struct net *net, + const struct nft_table *table, struct nft_chain *chain, unsigned int hook_nops) { @@ -161,10 +140,12 @@ static int nf_tables_register_hooks(const struct nft_table *table, !(chain->flags & NFT_BASE_CHAIN)) return 0; - return nft_register_basechain(nft_base_chain(chain), hook_nops); + return nf_register_net_hooks(net, nft_base_chain(chain)->ops, + hook_nops); } -static void nf_tables_unregister_hooks(const struct nft_table *table, +static void nf_tables_unregister_hooks(struct net *net, + const struct nft_table *table, struct nft_chain *chain, unsigned int hook_nops) { @@ -172,12 +153,9 @@ static void nf_tables_unregister_hooks(const struct nft_table *table, !(chain->flags & NFT_BASE_CHAIN)) return; - nft_unregister_basechain(nft_base_chain(chain), hook_nops); + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops); } -/* Internal table flags */ -#define NFT_TABLE_INACTIVE (1 << 15) - static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; @@ -187,7 +165,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) return -ENOMEM; if (msg_type == NFT_MSG_NEWTABLE) - ctx->table->flags |= NFT_TABLE_INACTIVE; + nft_activate_next(ctx->net, ctx->table); list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -201,7 +179,7 @@ static int nft_deltable(struct nft_ctx *ctx) if (err < 0) return err; - list_del_rcu(&ctx->table->list); + nft_deactivate_next(ctx->net, ctx->table); return err; } @@ -214,7 +192,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) return -ENOMEM; if (msg_type == NFT_MSG_NEWCHAIN) - ctx->chain->flags |= NFT_CHAIN_INACTIVE; + nft_activate_next(ctx->net, ctx->chain); list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -229,47 +207,17 @@ static int nft_delchain(struct nft_ctx *ctx) return err; ctx->table->use--; - list_del_rcu(&ctx->chain->list); + nft_deactivate_next(ctx->net, ctx->chain); return err; } -static inline bool -nft_rule_is_active(struct net *net, const struct nft_rule *rule) -{ - return (rule->genmask & nft_genmask_cur(net)) == 0; -} - -static inline int -nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) -{ - return (rule->genmask & nft_genmask_next(net)) == 0; -} - -static inline void -nft_rule_activate_next(struct net *net, struct nft_rule *rule) -{ - /* Now inactive, will be active in the future */ - rule->genmask = nft_genmask_cur(net); -} - -static inline void -nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) -{ - rule->genmask = nft_genmask_next(net); -} - -static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) -{ - rule->genmask &= ~nft_genmask_next(net); -} - static int nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ - if (nft_rule_is_active_next(ctx->net, rule)) { - nft_rule_deactivate_next(ctx->net, rule); + if (nft_is_active_next(ctx->net, rule)) { + nft_deactivate_next(ctx->net, rule); ctx->chain->use--; return 0; } @@ -322,9 +270,6 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) return 0; } -/* Internal set flag */ -#define NFT_SET_INACTIVE (1 << 15) - static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, struct nft_set *set) { @@ -337,7 +282,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); - set->flags |= NFT_SET_INACTIVE; + nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -353,7 +298,7 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; - list_del_rcu(&set->list); + nft_deactivate_next(ctx->net, set); ctx->table->use--; return err; @@ -364,26 +309,29 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) */ static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_table *table; list_for_each_entry(table, &afi->tables, list) { - if (!nla_strcmp(nla, table->name)) + if (!nla_strcmp(nla, table->name) && + nft_active_genmask(table, genmask)) return table; } return NULL; } static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - table = nft_table_lookup(afi, nla); + table = nft_table_lookup(afi, nla, genmask); if (table != NULL) return table; @@ -524,6 +472,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, table)) + continue; if (nf_tables_fill_table_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -548,6 +498,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; @@ -565,11 +516,9 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -588,17 +537,21 @@ err: return err; } -static int nf_tables_table_enable(const struct nft_af_info *afi, +static int nf_tables_table_enable(struct net *net, + const struct nft_af_info *afi, struct nft_table *table) { struct nft_chain *chain; int err, i = 0; list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; if (!(chain->flags & NFT_BASE_CHAIN)) continue; - err = nft_register_basechain(nft_base_chain(chain), afi->nops); + err = nf_register_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); if (err < 0) goto err; @@ -607,26 +560,34 @@ static int nf_tables_table_enable(const struct nft_af_info *afi, return 0; err: list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; if (!(chain->flags & NFT_BASE_CHAIN)) continue; if (i-- <= 0) break; - nft_unregister_basechain(nft_base_chain(chain), afi->nops); + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); } return err; } -static void nf_tables_table_disable(const struct nft_af_info *afi, +static void nf_tables_table_disable(struct net *net, + const struct nft_af_info *afi, struct nft_table *table) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { - if (chain->flags & NFT_BASE_CHAIN) - nft_unregister_basechain(nft_base_chain(chain), - afi->nops); + if (!nft_is_active_next(net, chain)) + continue; + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); } } @@ -656,7 +617,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->afi, ctx->table); + ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table); if (ret >= 0) { ctx->table->flags &= ~NFT_TABLE_F_DORMANT; nft_trans_table_enable(trans) = true; @@ -678,6 +639,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); const struct nlattr *name; struct nft_af_info *afi; struct nft_table *table; @@ -691,7 +653,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, return PTR_ERR(afi); name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(afi, name); + table = nf_tables_table_lookup(afi, name, genmask); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); @@ -699,8 +661,6 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, } if (table != NULL) { - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -752,6 +712,9 @@ static int nft_flush_table(struct nft_ctx *ctx) struct nft_set *set, *ns; list_for_each_entry(chain, &ctx->table->chains, list) { + if (!nft_is_active_next(ctx->net, chain)) + continue; + ctx->chain = chain; err = nft_delrule_by_chain(ctx); @@ -760,6 +723,9 @@ static int nft_flush_table(struct nft_ctx *ctx) } list_for_each_entry_safe(set, ns, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, set)) + continue; + if (set->flags & NFT_SET_ANONYMOUS && !list_empty(&set->bindings)) continue; @@ -770,6 +736,9 @@ static int nft_flush_table(struct nft_ctx *ctx) } list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { + if (!nft_is_active_next(ctx->net, chain)) + continue; + ctx->chain = chain; err = nft_delchain(ctx); @@ -795,6 +764,9 @@ static int nft_flush(struct nft_ctx *ctx, int family) ctx->afi = afi; list_for_each_entry_safe(table, nt, &afi->tables, list) { + if (!nft_is_active_next(ctx->net, table)) + continue; + if (nla[NFTA_TABLE_NAME] && nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) continue; @@ -815,6 +787,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; int family = nfmsg->nfgen_family; @@ -828,7 +801,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -875,12 +848,14 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type); */ static struct nft_chain * -nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) +nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle, + u8 genmask) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { - if (chain->handle == handle) + if (chain->handle == handle && + nft_active_genmask(chain, genmask)) return chain; } @@ -888,7 +863,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) } static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_chain *chain; @@ -896,7 +872,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, return ERR_PTR(-EINVAL); list_for_each_entry(chain, &table->chains, list) { - if (!nla_strcmp(nla, chain->name)) + if (!nla_strcmp(nla, chain->name) && + nft_active_genmask(chain, genmask)) return chain; } @@ -1079,6 +1056,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, chain)) + continue; if (nf_tables_fill_chain_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -1104,6 +1083,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; @@ -1122,17 +1102,13 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -1231,6 +1207,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_chain *chain; struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; + u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; struct net_device *dev = NULL; u8 policy = NF_ACCEPT; @@ -1247,7 +1224,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1256,11 +1233,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nla[NFTA_CHAIN_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); - chain = nf_tables_chain_lookup_byhandle(table, handle); + chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } else { - chain = nf_tables_chain_lookup(table, name); + chain = nf_tables_chain_lookup(table, name, genmask); if (IS_ERR(chain)) { if (PTR_ERR(chain) != -ENOENT) return PTR_ERR(chain); @@ -1291,16 +1268,20 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_stats *stats = NULL; struct nft_trans *trans; - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - if (nla[NFTA_CHAIN_HANDLE] && name && - !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) - return -EEXIST; + if (nla[NFTA_CHAIN_HANDLE] && name) { + struct nft_chain *chain2; + + chain2 = nf_tables_chain_lookup(table, + nla[NFTA_CHAIN_NAME], + genmask); + if (IS_ERR(chain2)) + return PTR_ERR(chain2); + } if (nla[NFTA_CHAIN_COUNTERS]) { if (!(chain->flags & NFT_BASE_CHAIN)) @@ -1455,7 +1436,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); - err = nf_tables_register_hooks(table, chain, afi->nops); + err = nf_tables_register_hooks(net, table, chain, afi->nops); if (err < 0) goto err1; @@ -1468,7 +1449,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: - nf_tables_unregister_hooks(table, chain, afi->nops); + nf_tables_unregister_hooks(net, table, chain, afi->nops); err1: nf_tables_chain_destroy(chain); return err; @@ -1479,6 +1460,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; @@ -1489,11 +1471,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); if (chain->use > 0) @@ -1898,7 +1880,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(table, &afi->tables, list) { list_for_each_entry_rcu(chain, &table->chains, list) { list_for_each_entry_rcu(rule, &chain->rules, list) { - if (!nft_rule_is_active(net, rule)) + if (!nft_is_active(net, rule)) goto cont; if (idx < s_idx) goto cont; @@ -1931,6 +1913,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; @@ -1950,17 +1933,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) @@ -2009,6 +1988,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; @@ -2029,11 +2009,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); @@ -2102,7 +2082,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, if (rule == NULL) goto err1; - nft_rule_activate_next(net, rule); + nft_activate_next(net, rule); rule->handle = handle; rule->dlen = size; @@ -2124,14 +2104,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (nft_rule_is_active_next(net, old_rule)) { + if (nft_is_active_next(net, old_rule)) { trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, old_rule); if (trans == NULL) { err = -ENOMEM; goto err2; } - nft_rule_deactivate_next(net, old_rule); + nft_deactivate_next(net, old_rule); chain->use--; list_add_tail_rcu(&rule->list, &old_rule->list); } else { @@ -2174,6 +2154,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain = NULL; @@ -2185,12 +2166,13 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); if (nla[NFTA_RULE_CHAIN]) { - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], + genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } @@ -2210,6 +2192,9 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, } } else { list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; + ctx.chain = chain; err = nft_delrule_by_chain(&ctx); if (err < 0) @@ -2339,7 +2324,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi = NULL; @@ -2355,7 +2341,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, if (afi == NULL) return -EAFNOSUPPORT; - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], + genmask); if (IS_ERR(table)) return PTR_ERR(table); } @@ -2365,7 +2352,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, } struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla) + const struct nlattr *nla, u8 genmask) { struct nft_set *set; @@ -2373,22 +2360,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-EINVAL); list_for_each_entry(set, &table->sets, list) { - if (!nla_strcmp(nla, set->name)) + if (!nla_strcmp(nla, set->name) && + nft_active_genmask(set, genmask)) return set; } return ERR_PTR(-ENOENT); } struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); list_for_each_entry(trans, &net->nft.commit_list, list) { + struct nft_set *set = nft_trans_set(trans); + if (trans->msg_type == NFT_MSG_NEWSET && - id == nft_trans_set_id(trans)) - return nft_trans_set(trans); + id == nft_trans_set_id(trans) && + nft_active_genmask(set, genmask)) + return set; } return ERR_PTR(-ENOENT); } @@ -2413,6 +2405,8 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; + if (!nft_is_active_next(ctx->net, set)) + continue; if (!sscanf(i->name, name, &tmp)) continue; if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE) @@ -2432,6 +2426,8 @@ cont: snprintf(set->name, sizeof(set->name), name, min + n); list_for_each_entry(i, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, i)) + continue; if (!strcmp(set->name, i->name)) return -ENFILE; } @@ -2580,6 +2576,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry_rcu(set, &table->sets, list) { if (idx < s_idx) goto cont; + if (!nft_is_active(net, set)) + goto cont; ctx_set = *ctx; ctx_set.table = table; @@ -2616,6 +2614,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; @@ -2623,7 +2622,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, int err; /* Verify existence before starting dump */ - err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -2650,11 +2649,9 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, if (!nla[NFTA_SET_TABLE]) return -EINVAL; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb2 == NULL) @@ -2693,6 +2690,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); const struct nft_set_ops *ops; struct nft_af_info *afi; struct nft_table *table; @@ -2790,13 +2788,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); - set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { if (PTR_ERR(set) != -ENOENT) return PTR_ERR(set); @@ -2895,6 +2893,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_set *set; struct nft_ctx ctx; int err; @@ -2904,11 +2903,11 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_TABLE] == NULL) return -EINVAL; - err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!list_empty(&set->bindings)) @@ -2973,7 +2972,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, list_del_rcu(&binding->list); if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && - !(set->flags & NFT_SET_INACTIVE)) + nft_is_active(ctx->net, set)) nf_tables_set_destroy(ctx, set); } @@ -3029,7 +3028,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; @@ -3039,7 +3039,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -3136,6 +3137,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_set_dump_args args; struct nft_ctx ctx; @@ -3152,17 +3154,14 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return err; err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, - (void *)nla); + (void *)nla, genmask); if (err < 0) return err; - if (ctx.table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; event = NFT_MSG_NEWSETELEM; event |= NFNL_SUBSYS_NFTABLES << 8; @@ -3216,21 +3215,19 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - if (ctx.table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -3548,6 +3545,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; @@ -3556,15 +3554,17 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) { if (nla[NFTA_SET_ELEM_LIST_SET_ID]) { set = nf_tables_set_lookup_byid(net, - nla[NFTA_SET_ELEM_LIST_SET_ID]); + nla[NFTA_SET_ELEM_LIST_SET_ID], + genmask); } if (IS_ERR(set)) return PTR_ERR(set); @@ -3670,6 +3670,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; @@ -3678,11 +3679,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) @@ -3950,36 +3952,40 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { if (!nft_trans_table_enable(trans)) { - nf_tables_table_disable(trans->ctx.afi, + nf_tables_table_disable(net, + trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } } else { - trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE; + nft_clear(net, trans->ctx.table); } nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: + list_del_rcu(&trans->ctx.table->list); nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) nft_chain_commit_update(trans); else - trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE; + nft_clear(net, trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); nft_trans_destroy(trans); break; case NFT_MSG_DELCHAIN: + list_del_rcu(&trans->ctx.chain->list); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); - nf_tables_unregister_hooks(trans->ctx.table, + nf_tables_unregister_hooks(trans->ctx.net, + trans->ctx.table, trans->ctx.chain, trans->ctx.afi->nops); break; case NFT_MSG_NEWRULE: - nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_clear(trans->ctx.net, nft_trans_rule(trans)); nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); @@ -3992,7 +3998,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) NFT_MSG_DELRULE); break; case NFT_MSG_NEWSET: - nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE; + nft_clear(net, nft_trans_set(trans)); /* This avoids hitting -EBUSY when deleting the table * from the transaction. */ @@ -4005,6 +4011,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELSET: + list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_DELSET, GFP_KERNEL); break; @@ -4076,7 +4083,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { if (nft_trans_table_enable(trans)) { - nf_tables_table_disable(trans->ctx.afi, + nf_tables_table_disable(net, + trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } @@ -4086,8 +4094,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) } break; case NFT_MSG_DELTABLE: - list_add_tail_rcu(&trans->ctx.table->list, - &trans->ctx.afi->tables); + nft_clear(trans->ctx.net, trans->ctx.table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: @@ -4098,15 +4105,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) } else { trans->ctx.table->use--; list_del_rcu(&trans->ctx.chain->list); - nf_tables_unregister_hooks(trans->ctx.table, + nf_tables_unregister_hooks(trans->ctx.net, + trans->ctx.table, trans->ctx.chain, trans->ctx.afi->nops); } break; case NFT_MSG_DELCHAIN: trans->ctx.table->use++; - list_add_tail_rcu(&trans->ctx.chain->list, - &trans->ctx.table->chains); + nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: @@ -4115,7 +4122,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; - nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: @@ -4124,8 +4131,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELSET: trans->ctx.table->use++; - list_add_tail_rcu(&nft_trans_set(trans)->list, - &trans->ctx.table->sets); + nft_clear(trans->ctx.net, nft_trans_set(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -4272,6 +4278,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, } list_for_each_entry(set, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, set)) + continue; if (!(set->flags & NFT_SET_MAP) || set->dtype != NFT_DATA_VERDICT) continue; @@ -4430,6 +4438,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla) { + u8 genmask = nft_genmask_next(ctx->net); struct nlattr *tb[NFTA_VERDICT_MAX + 1]; struct nft_chain *chain; int err; @@ -4462,7 +4471,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CHAIN]) return -EINVAL; chain = nf_tables_chain_lookup(ctx->table, - tb[NFTA_VERDICT_CHAIN]); + tb[NFTA_VERDICT_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); if (chain->flags & NFT_BASE_CHAIN) @@ -4640,7 +4649,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN)); - nf_tables_unregister_hooks(ctx->chain->table, ctx->chain, + nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain, ctx->afi->nops); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); @@ -4669,7 +4678,8 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) list_for_each_entry_safe(table, nt, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hooks(table, chain, afi->nops); + nf_tables_unregister_hooks(net, table, chain, + afi->nops); /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 11f81c8385fc..cbcfdfb586a6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -700,10 +700,13 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range > skb->len) + data_len = inst->copy_range; + if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) && + (li->u.ulog.copy_len < data_len)) + data_len = li->u.ulog.copy_len; + + if (data_len > skb->len) data_len = skb->len; - else - data_len = inst->copy_range; size += nla_total_size(data_len); break; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 78d4914fb39c..0af26699bf04 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -103,6 +103,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_dynset *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u64 timeout; int err; @@ -112,11 +113,13 @@ static int nft_dynset_init(const struct nft_ctx *ctx, tb[NFTA_DYNSET_SREG_KEY] == NULL) return -EINVAL; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]); + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], + genmask); if (IS_ERR(set)) { if (tb[NFTA_DYNSET_SET_ID]) set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_DYNSET_SET_ID]); + tb[NFTA_DYNSET_SET_ID], + genmask); if (IS_ERR(set)) return PTR_ERR(set); } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index f39c53a159eb..ea924816b7b8 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -153,9 +153,10 @@ static void *nft_hash_deactivate(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->pnet); struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; @@ -163,7 +164,8 @@ static void *nft_hash_deactivate(const struct nft_set *set, rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he != NULL) { - if (!nft_set_elem_mark_busy(&he->ext)) + if (!nft_set_elem_mark_busy(&he->ext) || + !nft_is_active(net, &he->ext)) nft_set_elem_change_active(set, &he->ext); else he = NULL; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 319c22b4bca2..713d66837705 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -52,7 +52,6 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; - int ret; nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -97,19 +96,6 @@ static int nft_log_init(const struct nft_ctx *ctx, break; } - if (ctx->afi->family == NFPROTO_INET) { - ret = nf_logger_find_get(NFPROTO_IPV4, li->type); - if (ret < 0) - return ret; - - ret = nf_logger_find_get(NFPROTO_IPV6, li->type); - if (ret < 0) { - nf_logger_put(NFPROTO_IPV4, li->type); - return ret; - } - return 0; - } - return nf_logger_find_get(ctx->afi->family, li->type); } @@ -122,12 +108,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx, if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); - if (ctx->afi->family == NFPROTO_INET) { - nf_logger_put(NFPROTO_IPV4, li->type); - nf_logger_put(NFPROTO_IPV6, li->type); - } else { - nf_logger_put(ctx->afi->family, li->type); - } + nf_logger_put(ctx->afi->family, li->type); } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b3c31ef8015d..b8d18f598569 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -22,6 +22,7 @@ struct nft_lookup { struct nft_set *set; enum nft_registers sreg:8; enum nft_registers dreg:8; + bool invert; struct nft_set_binding binding; }; @@ -32,14 +33,20 @@ static void nft_lookup_eval(const struct nft_expr *expr, const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; const struct nft_set_ext *ext; + bool found; - if (set->ops->lookup(set, ®s->data[priv->sreg], &ext)) { - if (set->flags & NFT_SET_MAP) - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), set->dlen); + found = set->ops->lookup(set, ®s->data[priv->sreg], &ext) ^ + priv->invert; + + if (!found) { + regs->verdict.code = NFT_BREAK; return; } - regs->verdict.code = NFT_BREAK; + + if (found && set->flags & NFT_SET_MAP) + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), set->dlen); + } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { @@ -47,6 +54,7 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 }, }; static int nft_lookup_init(const struct nft_ctx *ctx, @@ -54,18 +62,21 @@ static int nft_lookup_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_lookup *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; + u32 flags; int err; if (tb[NFTA_LOOKUP_SET] == NULL || tb[NFTA_LOOKUP_SREG] == NULL) return -EINVAL; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); + set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask); if (IS_ERR(set)) { if (tb[NFTA_LOOKUP_SET_ID]) { set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_LOOKUP_SET_ID]); + tb[NFTA_LOOKUP_SET_ID], + genmask); } if (IS_ERR(set)) return PTR_ERR(set); @@ -79,7 +90,22 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (tb[NFTA_LOOKUP_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS])); + + if (flags & ~NFT_LOOKUP_F_INV) + return -EINVAL; + + if (flags & NFT_LOOKUP_F_INV) { + if (set->flags & NFT_SET_MAP) + return -EINVAL; + priv->invert = true; + } + } + if (tb[NFTA_LOOKUP_DREG] != NULL) { + if (priv->invert) + return -EINVAL; if (!(set->flags & NFT_SET_MAP)) return -EINVAL; @@ -112,6 +138,7 @@ static void nft_lookup_destroy(const struct nft_ctx *ctx, static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0; if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) goto nla_put_failure; @@ -120,6 +147,8 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) if (priv->set->flags & NFT_SET_MAP) if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 16c50b0dd426..03e5e33b5c39 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -199,13 +199,6 @@ err: } EXPORT_SYMBOL_GPL(nft_meta_get_eval); -/* don't change or set _LOOPBACK, _USER, etc. */ -static bool pkt_type_ok(u32 p) -{ - return p == PACKET_HOST || p == PACKET_BROADCAST || - p == PACKET_MULTICAST || p == PACKET_OTHERHOST; -} - void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -223,7 +216,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, break; case NFT_META_PKTTYPE: if (skb->pkt_type != value && - pkt_type_ok(value) && pkt_type_ok(skb->pkt_type)) + skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type)) skb->pkt_type = value; break; case NFT_META_NFTRACE: diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 7201d57b5a93..c0f638745adc 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -170,7 +170,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe, *this = elem->priv; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); + u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; while (parent != NULL) { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2675d580c490..fe0e2db632c7 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1460,6 +1460,9 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) uint8_t hooknum; struct nf_hook_ops *ops; + if (!num_hooks) + return ERR_PTR(-EINVAL); + ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index a1fa2c800cb9..018eed7e1ff1 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -33,6 +33,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; + if (info->flags & XT_NFLOG_F_COPY_LEN) + li.u.ulog.flags |= NF_LOG_F_COPY_LEN; + nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, &li, info->prefix); return XT_CONTINUE; diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index df48967af382..858d189a1303 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -4,12 +4,23 @@ #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_log.h> MODULE_DESCRIPTION("Xtables: packet flow tracing"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); +static int trace_tg_check(const struct xt_tgchk_param *par) +{ + return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); +} + +static void trace_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_logger_put(par->family, NF_LOG_TYPE_LOG); +} + static unsigned int trace_tg(struct sk_buff *skb, const struct xt_action_param *par) { @@ -18,12 +29,14 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par) } static struct xt_target trace_tg_reg __read_mostly = { - .name = "TRACE", - .revision = 0, - .family = NFPROTO_UNSPEC, - .table = "raw", - .target = trace_tg, - .me = THIS_MODULE, + .name = "TRACE", + .revision = 0, + .family = NFPROTO_UNSPEC, + .table = "raw", + .target = trace_tg, + .checkentry = trace_tg_check, + .destroy = trace_tg_destroy, + .me = THIS_MODULE, }; static int __init trace_tg_init(void) diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 1302b475abcb..a20e731b5b6c 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -21,11 +21,39 @@ static int owner_check(const struct xt_mtchk_param *par) { struct xt_owner_match_info *info = par->matchinfo; + struct net *net = par->net; - /* For now only allow adding matches from the initial user namespace */ + /* Only allow the common case where the userns of the writer + * matches the userns of the network namespace. + */ if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) && - (current_user_ns() != &init_user_ns)) + (current_user_ns() != net->user_ns)) return -EINVAL; + + /* Ensure the uids are valid */ + if (info->match & XT_OWNER_UID) { + kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); + kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); + + if (!uid_valid(uid_min) || !uid_valid(uid_max) || + (info->uid_max < info->uid_min) || + uid_lt(uid_max, uid_min)) { + return -EINVAL; + } + } + + /* Ensure the gids are valid */ + if (info->match & XT_OWNER_GID) { + kgid_t gid_min = make_kgid(net->user_ns, info->gid_min); + kgid_t gid_max = make_kgid(net->user_ns, info->gid_max); + + if (!gid_valid(gid_min) || !gid_valid(gid_max) || + (info->gid_max < info->gid_min) || + gid_lt(gid_max, gid_min)) { + return -EINVAL; + } + } + return 0; } @@ -35,6 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; struct sock *sk = skb_to_full_sk(skb); + struct net *net = par->net; if (sk == NULL || sk->sk_socket == NULL) return (info->match ^ info->invert) == 0; @@ -51,8 +80,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) (XT_OWNER_UID | XT_OWNER_GID)) == 0; if (info->match & XT_OWNER_UID) { - kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); - kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); + kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); + kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ !(info->invert & XT_OWNER_UID)) @@ -60,8 +89,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } if (info->match & XT_OWNER_GID) { - kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); - kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); + kgid_t gid_min = make_kgid(net->user_ns, info->gid_min); + kgid_t gid_max = make_kgid(net->user_ns, info->gid_max); if ((gid_gte(filp->f_cred->fsgid, gid_min) && gid_lte(filp->f_cred->fsgid, gid_max)) ^ !(info->invert & XT_OWNER_GID)) diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index c14d4645daa3..ade024c90f4f 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -83,8 +83,6 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; } -#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) - th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we @@ -102,9 +100,8 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) return false; - if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask) - == tcpinfo->flg_cmp, - XT_TCP_INV_FLAGS)) + if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS, + (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp)) return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d1f3b9e977e5..9d92c4c46871 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb_get_hash(skb), num); + return reciprocal_scale(__skb_get_hash_symmetric(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1588,13 +1588,9 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, if (copy_from_user(&fd, data, len)) return -EFAULT; - new = bpf_prog_get(fd); + new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(new)) return PTR_ERR(new); - if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) { - bpf_prog_put(new); - return -EINVAL; - } __fanout_set_data_bpf(po->fanout, new); return 0; diff --git a/net/rds/connection.c b/net/rds/connection.c index a4b07c899d89..19a4fee5f4dd 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -253,9 +253,12 @@ static struct rds_connection *__rds_conn_create(struct net *net, for (i = 0; i < RDS_MPATH_WORKERS; i++) { cp = &conn->c_path[i]; - trans->conn_free(cp->cp_transport_data); - if (!trans->t_mp_capable) - break; + /* The ->conn_alloc invocation may have + * allocated resource for all paths, so all + * of them may have to be freed here. + */ + if (cp->cp_transport_data) + trans->conn_free(cp->cp_transport_data); } kmem_cache_free(rds_conn_slab, conn); conn = found; @@ -326,10 +329,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp) wait_event(cp->cp_waitq, !test_bit(RDS_RECV_REFILL, &cp->cp_flags)); - if (!conn->c_trans->t_mp_capable) - conn->c_trans->conn_shutdown(conn); - else - conn->c_trans->conn_path_shutdown(cp); + conn->c_trans->conn_path_shutdown(cp); rds_conn_path_reset(cp); if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, @@ -355,9 +355,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node)) { rcu_read_unlock(); - if (conn->c_trans->t_type != RDS_TRANS_TCP || - cp->cp_outgoing == 1) - rds_queue_reconnect(cp); + rds_queue_reconnect(cp); } else { rcu_read_unlock(); } @@ -370,6 +368,9 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) { struct rds_message *rm, *rtmp; + if (!cp->cp_transport_data) + return; + rds_conn_path_drop(cp); flush_work(&cp->cp_down_w); @@ -401,6 +402,8 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) void rds_conn_destroy(struct rds_connection *conn) { unsigned long flags; + int i; + struct rds_conn_path *cp; rdsdebug("freeing conn %p for %pI4 -> " "%pI4\n", conn, &conn->c_laddr, @@ -413,18 +416,10 @@ void rds_conn_destroy(struct rds_connection *conn) synchronize_rcu(); /* shut the connection down */ - if (!conn->c_trans->t_mp_capable) { - rds_conn_path_destroy(&conn->c_path[0]); - BUG_ON(!list_empty(&conn->c_path[0].cp_retrans)); - } else { - int i; - struct rds_conn_path *cp; - - for (i = 0; i < RDS_MPATH_WORKERS; i++) { - cp = &conn->c_path[i]; - rds_conn_path_destroy(cp); - BUG_ON(!list_empty(&cp->cp_retrans)); - } + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + cp = &conn->c_path[i]; + rds_conn_path_destroy(cp); + BUG_ON(!list_empty(&cp->cp_retrans)); } /* diff --git a/net/rds/ib.c b/net/rds/ib.c index 44946a681a8c..7eaf887e46f8 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -381,15 +381,15 @@ void rds_ib_exit(void) struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, - .xmit_complete = rds_ib_xmit_complete, + .xmit_path_complete = rds_ib_xmit_path_complete, .xmit = rds_ib_xmit, .xmit_rdma = rds_ib_xmit_rdma, .xmit_atomic = rds_ib_xmit_atomic, - .recv = rds_ib_recv, + .recv_path = rds_ib_recv_path, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, - .conn_connect = rds_ib_conn_connect, - .conn_shutdown = rds_ib_conn_shutdown, + .conn_path_connect = rds_ib_conn_path_connect, + .conn_path_shutdown = rds_ib_conn_path_shutdown, .inc_copy_to_user = rds_ib_inc_copy_to_user, .inc_free = rds_ib_inc_free, .cm_initiate_connect = rds_ib_cm_initiate_connect, diff --git a/net/rds/ib.h b/net/rds/ib.h index 627fb79aee65..046f7508c06b 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -328,8 +328,8 @@ extern struct list_head ib_nodev_conns; /* ib_cm.c */ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp); void rds_ib_conn_free(void *arg); -int rds_ib_conn_connect(struct rds_connection *conn); -void rds_ib_conn_shutdown(struct rds_connection *conn); +int rds_ib_conn_path_connect(struct rds_conn_path *cp); +void rds_ib_conn_path_shutdown(struct rds_conn_path *cp); void rds_ib_state_change(struct sock *sk); int rds_ib_listen_init(void); void rds_ib_listen_stop(void); @@ -354,7 +354,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); /* ib_recv.c */ int rds_ib_recv_init(void); void rds_ib_recv_exit(void); -int rds_ib_recv(struct rds_connection *conn); +int rds_ib_recv_path(struct rds_conn_path *conn); int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic); void rds_ib_recv_free_caches(struct rds_ib_connection *ic); void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); @@ -384,7 +384,7 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest); extern wait_queue_head_t rds_ib_ring_empty_wait; /* ib_send.c */ -void rds_ib_xmit_complete(struct rds_connection *conn); +void rds_ib_xmit_path_complete(struct rds_conn_path *cp); int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index e48bb1ba3dfc..5b2ab95afa07 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -685,8 +685,9 @@ out: return ret; } -int rds_ib_conn_connect(struct rds_connection *conn) +int rds_ib_conn_path_connect(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; struct sockaddr_in src, dest; int ret; @@ -731,8 +732,9 @@ out: * so that it can be called at any point during startup. In fact it * can be called multiple times for a given connection. */ -void rds_ib_conn_shutdown(struct rds_connection *conn) +void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; int err = 0; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 4ea8cb17cc7a..606a11f681d2 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1009,8 +1009,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, rds_ib_recv_refill(conn, 0, GFP_NOWAIT); } -int rds_ib_recv(struct rds_connection *conn) +int rds_ib_recv_path(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; int ret = 0; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 6e4110aa5135..84d90c97332f 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -980,8 +980,9 @@ out: return ret; } -void rds_ib_xmit_complete(struct rds_connection *conn) +void rds_ib_xmit_path_complete(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; /* We may have a pending ACK or window update we were unable diff --git a/net/rds/loop.c b/net/rds/loop.c index 15f83db78f0c..f2bf78de5688 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -102,7 +102,7 @@ static void rds_loop_inc_free(struct rds_incoming *inc) } /* we need to at least give the thread something to succeed */ -static int rds_loop_recv(struct rds_connection *conn) +static int rds_loop_recv_path(struct rds_conn_path *cp) { return 0; } @@ -150,13 +150,13 @@ static void rds_loop_conn_free(void *arg) kfree(lc); } -static int rds_loop_conn_connect(struct rds_connection *conn) +static int rds_loop_conn_path_connect(struct rds_conn_path *cp) { - rds_connect_complete(conn); + rds_connect_complete(cp->cp_conn); return 0; } -static void rds_loop_conn_shutdown(struct rds_connection *conn) +static void rds_loop_conn_path_shutdown(struct rds_conn_path *cp) { } @@ -185,11 +185,11 @@ void rds_loop_exit(void) */ struct rds_transport rds_loop_transport = { .xmit = rds_loop_xmit, - .recv = rds_loop_recv, + .recv_path = rds_loop_recv_path, .conn_alloc = rds_loop_conn_alloc, .conn_free = rds_loop_conn_free, - .conn_connect = rds_loop_conn_connect, - .conn_shutdown = rds_loop_conn_shutdown, + .conn_path_connect = rds_loop_conn_path_connect, + .conn_path_shutdown = rds_loop_conn_path_shutdown, .inc_copy_to_user = rds_message_inc_copy_to_user, .inc_free = rds_loop_inc_free, .t_name = "loopback", diff --git a/net/rds/rds.h b/net/rds/rds.h index 2e35b738176f..6ef07bd27227 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -454,18 +454,15 @@ struct rds_transport { int (*laddr_check)(struct net *net, __be32 addr); int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); void (*conn_free)(void *data); - int (*conn_connect)(struct rds_connection *conn); - void (*conn_shutdown)(struct rds_connection *conn); + int (*conn_path_connect)(struct rds_conn_path *cp); void (*conn_path_shutdown)(struct rds_conn_path *conn); - void (*xmit_prepare)(struct rds_connection *conn); void (*xmit_path_prepare)(struct rds_conn_path *cp); - void (*xmit_complete)(struct rds_connection *conn); void (*xmit_path_complete)(struct rds_conn_path *cp); int (*xmit)(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op); int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op); - int (*recv)(struct rds_connection *conn); + int (*recv_path)(struct rds_conn_path *cp); int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to); void (*inc_free)(struct rds_incoming *inc); diff --git a/net/rds/recv.c b/net/rds/recv.c index b58f50571782..fed53a6c2890 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -226,6 +226,10 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { + if (inc->i_hdr.h_sport == 0) { + rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr); + goto out; + } rds_stats_inc(s_recv_ping); rds_send_pong(cp, inc->i_hdr.h_sport); goto out; diff --git a/net/rds/send.c b/net/rds/send.c index ee43d6b2ea8f..5a9caf1da896 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -183,12 +183,8 @@ restart: goto out; } - if (conn->c_trans->t_mp_capable) { - if (conn->c_trans->xmit_path_prepare) - conn->c_trans->xmit_path_prepare(cp); - } else if (conn->c_trans->xmit_prepare) { - conn->c_trans->xmit_prepare(conn); - } + if (conn->c_trans->xmit_path_prepare) + conn->c_trans->xmit_path_prepare(cp); /* * spin trying to push headers and data down the connection until @@ -403,12 +399,8 @@ restart: } over_batch: - if (conn->c_trans->t_mp_capable) { - if (conn->c_trans->xmit_path_complete) - conn->c_trans->xmit_path_complete(cp); - } else if (conn->c_trans->xmit_complete) { - conn->c_trans->xmit_complete(conn); - } + if (conn->c_trans->xmit_path_complete) + conn->c_trans->xmit_path_complete(cp); release_in_xmit(cp); /* Nuke any messages we decided not to retransmit. */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5217d49ce6d6..d24f6c142d03 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -136,9 +136,9 @@ void rds_tcp_restore_callbacks(struct socket *sock, * from being called while it isn't set. */ void rds_tcp_reset_callbacks(struct socket *sock, - struct rds_connection *conn) + struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *osock = tc->t_sock; if (!osock) @@ -148,8 +148,8 @@ void rds_tcp_reset_callbacks(struct socket *sock, * We have an outstanding SYN to this peer, which may * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting - * c_transport_data. We quiesce these threads by setting - * c_state to something other than RDS_CONN_UP, and then + * cp_transport_data. We quiesce these threads by setting + * cp_state to something other than RDS_CONN_UP, and then * waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). @@ -164,8 +164,8 @@ void rds_tcp_reset_callbacks(struct socket *sock, * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change * cannot mark rds_conn_path_up() in the window before lock_sock() */ - atomic_set(&conn->c_state, RDS_CONN_RESETTING); - wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); + atomic_set(&cp->cp_state, RDS_CONN_RESETTING); + wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ if (tc->t_tinc) { @@ -186,11 +186,12 @@ void rds_tcp_reset_callbacks(struct socket *sock, release_sock(osock->sk); sock_release(osock); newsock: - rds_send_path_reset(&conn->c_path[0]); + rds_send_path_reset(cp); lock_sock(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); tc->t_sock = sock; - sock->sk->sk_user_data = conn; + tc->t_cpath = cp; + sock->sk->sk_user_data = cp; sock->sk->sk_data_ready = rds_tcp_data_ready; sock->sk->sk_write_space = rds_tcp_write_space; sock->sk->sk_state_change = rds_tcp_state_change; @@ -203,9 +204,9 @@ newsock: * above rds_tcp_reset_callbacks for notes about synchronization * with data path */ -void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) +void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc); write_lock_bh(&sock->sk->sk_callback_lock); @@ -221,12 +222,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) sock->sk->sk_data_ready = sock->sk->sk_user_data; tc->t_sock = sock; - tc->conn = conn; + tc->t_cpath = cp; tc->t_orig_data_ready = sock->sk->sk_data_ready; tc->t_orig_write_space = sock->sk->sk_write_space; tc->t_orig_state_change = sock->sk->sk_state_change; - sock->sk->sk_user_data = conn; + sock->sk->sk_user_data = cp; sock->sk->sk_data_ready = rds_tcp_data_ready; sock->sk->sk_write_space = rds_tcp_write_space; sock->sk->sk_state_change = rds_tcp_state_change; @@ -284,24 +285,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) { struct rds_tcp_connection *tc; + int i; - tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); - if (!tc) - return -ENOMEM; + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); + if (!tc) + return -ENOMEM; - mutex_init(&tc->t_conn_lock); - tc->t_sock = NULL; - tc->t_tinc = NULL; - tc->t_tinc_hdr_rem = sizeof(struct rds_header); - tc->t_tinc_data_rem = 0; + mutex_init(&tc->t_conn_path_lock); + tc->t_sock = NULL; + tc->t_tinc = NULL; + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; - conn->c_transport_data = tc; + conn->c_path[i].cp_transport_data = tc; + tc->t_cpath = &conn->c_path[i]; - spin_lock_irq(&rds_tcp_conn_lock); - list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); - spin_unlock_irq(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); + list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); + spin_unlock_irq(&rds_tcp_conn_lock); + rdsdebug("rds_conn_path [%d] tc %p\n", i, + conn->c_path[i].cp_transport_data); + } - rdsdebug("alloced tc %p\n", conn->c_transport_data); return 0; } @@ -318,6 +324,17 @@ static void rds_tcp_conn_free(void *arg) kmem_cache_free(rds_tcp_conn_slab, tc); } +static bool list_has_conn(struct list_head *list, struct rds_connection *conn) +{ + struct rds_tcp_connection *tc, *_tc; + + list_for_each_entry_safe(tc, _tc, list, t_tcp_node) { + if (tc->t_cpath->cp_conn == conn) + return true; + } + return false; +} + static void rds_tcp_destroy_conns(void) { struct rds_tcp_connection *tc, *_tc; @@ -325,29 +342,28 @@ static void rds_tcp_destroy_conns(void) /* avoid calling conn_destroy with irqs off */ spin_lock_irq(&rds_tcp_conn_lock); - list_splice(&rds_tcp_conn_list, &tmp_list); - INIT_LIST_HEAD(&rds_tcp_conn_list); + list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + list_move_tail(&tc->t_tcp_node, &tmp_list); + } spin_unlock_irq(&rds_tcp_conn_lock); - list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - if (tc->conn->c_passive) - rds_conn_destroy(tc->conn->c_passive); - rds_conn_destroy(tc->conn); - } + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) + rds_conn_destroy(tc->t_cpath->cp_conn); } static void rds_tcp_exit(void); struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, - .xmit_prepare = rds_tcp_xmit_prepare, - .xmit_complete = rds_tcp_xmit_complete, + .xmit_path_prepare = rds_tcp_xmit_path_prepare, + .xmit_path_complete = rds_tcp_xmit_path_complete, .xmit = rds_tcp_xmit, - .recv = rds_tcp_recv, + .recv_path = rds_tcp_recv_path, .conn_alloc = rds_tcp_conn_alloc, .conn_free = rds_tcp_conn_free, - .conn_connect = rds_tcp_conn_connect, - .conn_shutdown = rds_tcp_conn_shutdown, + .conn_path_connect = rds_tcp_conn_path_connect, + .conn_path_shutdown = rds_tcp_conn_path_shutdown, .inc_copy_to_user = rds_tcp_inc_copy_to_user, .inc_free = rds_tcp_inc_free, .stats_info_copy = rds_tcp_stats_info_copy, @@ -489,10 +505,30 @@ static struct pernet_operations rds_tcp_net_ops = { .size = sizeof(struct rds_tcp_net), }; +/* explicitly send a RST on each socket, thereby releasing any socket refcnts + * that may otherwise hold up netns deletion. + */ +static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) +{ + struct rds_conn_path *cp; + struct rds_tcp_connection *tc; + int i; + struct sock *sk; + + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + cp = &conn->c_path[i]; + tc = cp->cp_transport_data; + if (!tc->t_sock) + continue; + sk = tc->t_sock->sk; + sk->sk_prot->disconnect(sk, 0); + tcp_done(sk); + } +} + static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; - struct sock *sk; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); @@ -501,20 +537,17 @@ static void rds_tcp_kill_sock(struct net *net) flush_work(&rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->conn->c_net); + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - list_move_tail(&tc->t_tcp_node, &tmp_list); + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + list_move_tail(&tc->t_tcp_node, &tmp_list); } spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - sk = tc->t_sock->sk; - sk->sk_prot->disconnect(sk, 0); - tcp_done(sk); - if (tc->conn->c_passive) - rds_conn_destroy(tc->conn->c_passive); - rds_conn_destroy(tc->conn); + rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); + rds_conn_destroy(tc->t_cpath->cp_conn); } } @@ -552,12 +585,13 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->conn->c_net); + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - rds_conn_drop(tc->conn); /* reconnect with new parameters */ + /* reconnect with new parameters */ + rds_conn_path_drop(tc->t_cpath); } spin_unlock_irq(&rds_tcp_conn_lock); } @@ -617,7 +651,7 @@ static int rds_tcp_init(void) ret = rds_tcp_recv_init(); if (ret) - goto out_slab; + goto out_pernet; ret = rds_trans_register(&rds_tcp_transport); if (ret) @@ -629,8 +663,9 @@ static int rds_tcp_init(void) out_recv: rds_tcp_recv_exit(); -out_slab: +out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); +out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 7940babf6c71..1c3160faa963 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -11,11 +11,11 @@ struct rds_tcp_incoming { struct rds_tcp_connection { struct list_head t_tcp_node; - struct rds_connection *conn; - /* t_conn_lock synchronizes the connection establishment between - * rds_tcp_accept_one and rds_tcp_conn_connect + struct rds_conn_path *t_cpath; + /* t_conn_path_lock synchronizes the connection establishment between + * rds_tcp_accept_one and rds_tcp_conn_path_connect */ - struct mutex t_conn_lock; + struct mutex t_conn_path_lock; struct socket *t_sock; void *t_orig_write_space; void *t_orig_data_ready; @@ -49,8 +49,8 @@ struct rds_tcp_statistics { /* tcp.c */ void rds_tcp_tune(struct socket *sock); void rds_tcp_nonagle(struct socket *sock); -void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); -void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); +void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); @@ -60,8 +60,8 @@ extern struct rds_transport rds_tcp_transport; void rds_tcp_accept_work(struct sock *sk); /* tcp_connect.c */ -int rds_tcp_conn_connect(struct rds_connection *conn); -void rds_tcp_conn_shutdown(struct rds_connection *conn); +int rds_tcp_conn_path_connect(struct rds_conn_path *cp); +void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ @@ -75,13 +75,13 @@ int rds_tcp_keepalive(struct socket *sock); int rds_tcp_recv_init(void); void rds_tcp_recv_exit(void); void rds_tcp_data_ready(struct sock *sk); -int rds_tcp_recv(struct rds_connection *conn); +int rds_tcp_recv_path(struct rds_conn_path *cp); void rds_tcp_inc_free(struct rds_incoming *inc); int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); /* tcp_send.c */ -void rds_tcp_xmit_prepare(struct rds_connection *conn); -void rds_tcp_xmit_complete(struct rds_connection *conn); +void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp); +void rds_tcp_xmit_path_complete(struct rds_conn_path *cp); int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_tcp_write_space(struct sock *sk); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 96c2c4d17909..c916715fbe61 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -41,16 +41,16 @@ void rds_tcp_state_change(struct sock *sk) { void (*state_change)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { + cp = sk->sk_user_data; + if (!cp) { state_change = sk->sk_state_change; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; state_change = tc->t_orig_state_change; rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); @@ -61,12 +61,11 @@ void rds_tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; case TCP_ESTABLISHED: - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_CONNECTING); + rds_connect_path_complete(cp, RDS_CONN_CONNECTING); break; case TCP_CLOSE_WAIT: case TCP_CLOSE: - rds_conn_drop(conn); + rds_conn_path_drop(cp); default: break; } @@ -75,17 +74,18 @@ out: state_change(sk); } -int rds_tcp_conn_connect(struct rds_connection *conn) +int rds_tcp_conn_path_connect(struct rds_conn_path *cp) { struct socket *sock = NULL; struct sockaddr_in src, dest; int ret; - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_connection *conn = cp->cp_conn; + struct rds_tcp_connection *tc = cp->cp_transport_data; - mutex_lock(&tc->t_conn_lock); + mutex_lock(&tc->t_conn_path_lock); - if (rds_conn_up(conn)) { - mutex_unlock(&tc->t_conn_lock); + if (rds_conn_path_up(cp)) { + mutex_unlock(&tc->t_conn_path_lock); return 0; } ret = sock_create_kern(rds_conn_net(conn), PF_INET, @@ -114,10 +114,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn) * once we call connect() we can start getting callbacks and they * own the socket */ - rds_tcp_set_callbacks(sock, conn); + rds_tcp_set_callbacks(sock, cp); ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), O_NONBLOCK); + cp->cp_outgoing = 1; rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) ret = 0; @@ -125,11 +126,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn) rds_tcp_keepalive(sock); sock = NULL; } else { - rds_tcp_restore_callbacks(sock, conn->c_transport_data); + rds_tcp_restore_callbacks(sock, cp->cp_transport_data); } out: - mutex_unlock(&tc->t_conn_lock); + mutex_unlock(&tc->t_conn_path_lock); if (sock) sock_release(sock); return ret; @@ -144,12 +145,13 @@ out: * callbacks to those set by TCP. Our callbacks won't execute again once we * hold the sock lock. */ -void rds_tcp_conn_shutdown(struct rds_connection *conn) +void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; - rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock); + rdsdebug("shutting down conn %p tc %p sock %p\n", + cp->cp_conn, tc, sock); if (sock) { sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index f9cc945a77b3..ca975a217a49 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -79,6 +79,7 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; + struct rds_conn_path *cp; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -120,8 +121,9 @@ int rds_tcp_accept_one(struct socket *sock) * rds_tcp_state_change() will do that cleanup */ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; + cp = &conn->c_path[0]; rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); - mutex_lock(&rs_tcp->t_conn_lock); + mutex_lock(&rs_tcp->t_conn_path_lock); conn_state = rds_conn_state(conn); if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP) goto rst_nsk; @@ -136,16 +138,14 @@ int rds_tcp_accept_one(struct socket *sock) !conn->c_path[0].cp_outgoing) { goto rst_nsk; } else { - rds_tcp_reset_callbacks(new_sock, conn); + rds_tcp_reset_callbacks(new_sock, cp); conn->c_path[0].cp_outgoing = 0; /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_RESETTING); + rds_connect_path_complete(cp, RDS_CONN_RESETTING); } } else { - rds_tcp_set_callbacks(new_sock, conn); - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_CONNECTING); + rds_tcp_set_callbacks(new_sock, cp); + rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } new_sock = NULL; ret = 0; @@ -156,7 +156,7 @@ rst_nsk: ret = 0; out: if (rs_tcp) - mutex_unlock(&rs_tcp->t_conn_lock); + mutex_unlock(&rs_tcp->t_conn_path_lock); if (new_sock) sock_release(new_sock); return ret; diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 4a87d9ef3084..ad4892e97f91 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -34,7 +34,6 @@ #include <linux/slab.h> #include <net/tcp.h> -#include "rds_single_path.h" #include "rds.h" #include "tcp.h" @@ -148,7 +147,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn, } struct rds_tcp_desc_arg { - struct rds_connection *conn; + struct rds_conn_path *conn_path; gfp_t gfp; }; @@ -156,8 +155,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct rds_tcp_desc_arg *arg = desc->arg.data; - struct rds_connection *conn = arg->conn; - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_conn_path *cp = arg->conn_path; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct rds_tcp_incoming *tinc = tc->t_tinc; struct sk_buff *clone; size_t left = len, to_copy; @@ -179,7 +178,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } tc->t_tinc = tinc; rdsdebug("alloced tinc %p\n", tinc); - rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr); + rds_inc_path_init(&tinc->ti_inc, cp, + cp->cp_conn->c_faddr); /* * XXX * we might be able to use the __ variants when * we've already serialized at a higher level. @@ -229,6 +229,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) { + struct rds_connection *conn = cp->cp_conn; + if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) rds_tcp_cong_recv(conn, tinc); else @@ -251,15 +253,15 @@ out: } /* the caller has to hold the sock lock */ -static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) +static int rds_tcp_read_sock(struct rds_conn_path *cp, gfp_t gfp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; read_descriptor_t desc; struct rds_tcp_desc_arg arg; /* It's like glib in the kernel! */ - arg.conn = conn; + arg.conn_path = cp; arg.gfp = gfp; desc.arg.data = &arg; desc.error = 0; @@ -279,16 +281,17 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) * if we fail to allocate we're in trouble.. blindly wait some time before * trying again to see if the VM can free up something for us. */ -int rds_tcp_recv(struct rds_connection *conn) +int rds_tcp_recv_path(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; int ret = 0; - rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock); + rdsdebug("recv worker path [%d] tc %p sock %p\n", + cp->cp_index, tc, sock); lock_sock(sock->sk); - ret = rds_tcp_read_sock(conn, GFP_KERNEL); + ret = rds_tcp_read_sock(cp, GFP_KERNEL); release_sock(sock->sk); return ret; @@ -297,24 +300,24 @@ int rds_tcp_recv(struct rds_connection *conn) void rds_tcp_data_ready(struct sock *sk) { void (*ready)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; rdsdebug("data ready sk %p\n", sk); read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { /* check for teardown race */ + cp = sk->sk_user_data; + if (!cp) { /* check for teardown race */ ready = sk->sk_data_ready; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; ready = tc->t_orig_data_ready; rds_tcp_stats_inc(s_tcp_data_ready_calls); - if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) - queue_delayed_work(rds_wq, &conn->c_recv_w, 0); + if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) + queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); out: read_unlock_bh(&sk->sk_callback_lock); ready(sk); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 710f1aae97ad..57e0f5826406 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -49,16 +49,16 @@ static void rds_tcp_cork(struct socket *sock, int val) set_fs(oldfs); } -void rds_tcp_xmit_prepare(struct rds_connection *conn) +void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rds_tcp_cork(tc->t_sock, 1); } -void rds_tcp_xmit_complete(struct rds_connection *conn) +void rds_tcp_xmit_path_complete(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rds_tcp_cork(tc->t_sock, 0); } @@ -178,27 +178,27 @@ static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack) void rds_tcp_write_space(struct sock *sk) { void (*write_space)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { + cp = sk->sk_user_data; + if (!cp) { write_space = sk->sk_write_space; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; rdsdebug("write_space for tc %p\n", tc); write_space = tc->t_orig_write_space; rds_tcp_stats_inc(s_tcp_write_space_calls); rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc)); tc->t_last_seen_una = rds_tcp_snd_una(tc); - rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); + rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) - queue_delayed_work(rds_wq, &conn->c_send_w, 0); + queue_delayed_work(rds_wq, &cp->cp_send_w, 0); out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/threads.c b/net/rds/threads.c index 9fbe95bb14a9..bc97d67f29cc 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -125,6 +125,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp) conn, &conn->c_laddr, &conn->c_faddr, cp->cp_reconnect_jiffies); + /* let peer with smaller addr initiate reconnect, to avoid duels */ + if (conn->c_trans->t_type == RDS_TRANS_TCP && + conn->c_laddr > conn->c_faddr) + return; + set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; @@ -152,8 +157,9 @@ void rds_connect_worker(struct work_struct *work) int ret; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); - if (rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { - ret = conn->c_trans->conn_connect(conn); + ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); + if (ret) { + ret = conn->c_trans->conn_path_connect(cp); rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", conn, &conn->c_laddr, &conn->c_faddr, ret); @@ -203,7 +209,7 @@ void rds_recv_worker(struct work_struct *work) int ret; if (rds_conn_path_state(cp) == RDS_CONN_UP) { - ret = cp->cp_conn->c_trans->recv(cp->cp_conn); + ret = cp->cp_conn->c_trans->recv_path(cp); rdsdebug("conn %p ret %d\n", cp->cp_conn, ret); switch (ret) { case -EAGAIN: diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index f7b6cf49ea6f..ef74bffa6101 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -223,15 +223,10 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); - fp = bpf_prog_get(bpf_fd); + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT); if (IS_ERR(fp)) return PTR_ERR(fp); - if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { - bpf_prog_put(fp); - return -EINVAL; - } - if (tb[TCA_ACT_BPF_NAME]) { name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), nla_len(tb[TCA_ACT_BPF_NAME]), diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5b135d357e1e..70cfbbf96af2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) - skb_push(skb2, skb->mac_len); + skb_push_rcsum(skb2, skb->mac_len); } /* mirror is always swallowed */ diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 53d1486cddf7..8e573c0f8742 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -47,6 +47,8 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, skb_set_queue_mapping(skb, d->queue_mapping); if (d->flags & SKBEDIT_F_MARK) skb->mark = d->mark; + if (d->flags & SKBEDIT_F_PTYPE) + skb->pkt_type = d->ptype; spin_unlock(&d->tcf_lock); return d->tcf_action; @@ -57,6 +59,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, + [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -68,7 +71,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tc_skbedit *parm; struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL; - u16 *queue_mapping = NULL; + u16 *queue_mapping = NULL, *ptype = NULL; bool exists = false; int ret = 0, err; @@ -92,6 +95,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); } + if (tb[TCA_SKBEDIT_PTYPE] != NULL) { + ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]); + if (!skb_pkt_type_ok(*ptype)) + return -EINVAL; + flags |= SKBEDIT_F_PTYPE; + } + if (tb[TCA_SKBEDIT_MARK] != NULL) { flags |= SKBEDIT_F_MARK; mark = nla_data(tb[TCA_SKBEDIT_MARK]); @@ -132,6 +142,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, d->queue_mapping = *queue_mapping; if (flags & SKBEDIT_F_MARK) d->mark = *mark; + if (flags & SKBEDIT_F_PTYPE) + d->ptype = *ptype; d->tcf_action = parm->action; @@ -158,16 +170,16 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_PRIORITY) && - nla_put(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), - &d->priority)) + nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) && - nla_put(skb, TCA_SKBEDIT_QUEUE_MAPPING, - sizeof(d->queue_mapping), &d->queue_mapping)) + nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_MARK) && - nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark), - &d->mark)) + nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark)) + goto nla_put_failure; + if ((d->flags & SKBEDIT_F_PTYPE) && + nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype)) goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 7b342c779da7..c3002c2c68bb 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -272,15 +272,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); - fp = bpf_prog_get(bpf_fd); + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); if (IS_ERR(fp)) return PTR_ERR(fp); - if (fp->type != BPF_PROG_TYPE_SCHED_CLS) { - bpf_prog_put(fp); - return -EINVAL; - } - if (tb[TCA_BPF_NAME]) { name = kmemdup(nla_data(tb[TCA_BPF_NAME]), nla_len(tb[TCA_BPF_NAME]), diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 8cb5eff7b79c..dff92ea772fe 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -130,7 +130,6 @@ struct hfsc_class { struct rb_node vt_node; /* parent's vt_tree member */ struct rb_root cf_tree; /* active children sorted by cl_f */ struct rb_node cf_node; /* parent's cf_heap member */ - struct list_head dlist; /* drop list member */ u64 cl_total; /* total work in bytes */ u64 cl_cumul; /* cumulative work in bytes done by @@ -177,8 +176,6 @@ struct hfsc_sched { struct hfsc_class root; /* root class */ struct Qdisc_class_hash clhash; /* class hash */ struct rb_root eligible; /* eligible tree */ - struct list_head droplist; /* active leaf class list (for - dropping) */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; @@ -781,6 +778,20 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) else go_passive = 0; + /* update vt */ + cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + - cl->cl_vtoff + cl->cl_vtadj; + + /* + * if vt of the class is smaller than cvtmin, + * the class was skipped in the past due to non-fit. + * if so, we need to adjust vtadj. + */ + if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { + cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; + cl->cl_vt = cl->cl_parent->cl_cvtmin; + } + if (go_passive) { /* no more active child, going passive */ @@ -797,25 +808,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) continue; } - /* - * update vt and f - */ - cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - - cl->cl_vtoff + cl->cl_vtadj; - - /* - * if vt of the class is smaller than cvtmin, - * the class was skipped in the past due to non-fit. - * if so, we need to adjust vtadj. - */ - if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { - cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; - cl->cl_vt = cl->cl_parent->cl_cvtmin; - } - /* update the vt tree */ vttree_update(cl); + /* update f */ if (cl->cl_flags & HFSC_USC) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); @@ -858,7 +854,6 @@ set_active(struct hfsc_class *cl, unsigned int len) if (cl->cl_flags & HFSC_FSC) init_vf(cl, len); - list_add_tail(&cl->dlist, &cl->sched->droplist); } static void @@ -867,8 +862,6 @@ set_passive(struct hfsc_class *cl) if (cl->cl_flags & HFSC_RSC) eltree_remove(cl); - list_del(&cl->dlist); - /* * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from vttree. @@ -882,7 +875,7 @@ qdisc_peek_len(struct Qdisc *sch) unsigned int len; skb = sch->ops->peek(sch); - if (skb == NULL) { + if (unlikely(skb == NULL)) { qdisc_warn_nonwc("qdisc_peek_len", sch); return 0; } @@ -947,7 +940,7 @@ static void hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) { sc2isc(fsc, &cl->cl_fsc); - rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); + rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total); cl->cl_flags |= HFSC_FSC; } @@ -1443,7 +1436,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; q->eligible = RB_ROOT; - INIT_LIST_HEAD(&q->droplist); q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; @@ -1527,7 +1519,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) hfsc_reset_class(cl); } q->eligible = RB_ROOT; - INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); sch->qstats.backlog = 0; sch->q.qlen = 0; @@ -1594,8 +1585,17 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (cl->qdisc->q.qlen == 1) + if (cl->qdisc->q.qlen == 1) { set_active(cl, qdisc_pkt_len(skb)); + /* + * If this is the first packet, isolate the head so an eventual + * head drop before the first dequeue operation has no chance + * to invalidate the deadline. + */ + if (cl->cl_flags & HFSC_RSC) + cl->qdisc->ops->peek(cl->qdisc); + + } qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 3ad9fab1985f..1fd464764765 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), + nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, |