aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/batman-adv/Kconfig2
-rw-r--r--net/batman-adv/Makefile5
-rw-r--r--net/batman-adv/bat_algo.c140
-rw-r--r--net/batman-adv/bat_algo.h32
-rw-r--r--net/batman-adv/bat_iv_ogm.c106
-rw-r--r--net/batman-adv/bat_iv_ogm.h25
-rw-r--r--net/batman-adv/bat_v.c58
-rw-r--r--net/batman-adv/bat_v.h52
-rw-r--r--net/batman-adv/bat_v_elp.c7
-rw-r--r--net/batman-adv/bat_v_elp.h4
-rw-r--r--net/batman-adv/bat_v_ogm.c9
-rw-r--r--net/batman-adv/bat_v_ogm.h4
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c1
-rw-r--r--net/batman-adv/debugfs.c240
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/fragmentation.c53
-rw-r--r--net/batman-adv/fragmentation.h6
-rw-r--r--net/batman-adv/gateway_client.c16
-rw-r--r--net/batman-adv/gateway_common.c10
-rw-r--r--net/batman-adv/hard-interface.c25
-rw-r--r--net/batman-adv/icmp_socket.c1
-rw-r--r--net/batman-adv/log.c231
-rw-r--r--net/batman-adv/log.h111
-rw-r--r--net/batman-adv/main.c709
-rw-r--r--net/batman-adv/main.h121
-rw-r--r--net/batman-adv/multicast.c501
-rw-r--r--net/batman-adv/multicast.h3
-rw-r--r--net/batman-adv/netlink.c424
-rw-r--r--net/batman-adv/netlink.h32
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/originator.c91
-rw-r--r--net/batman-adv/originator.h6
-rw-r--r--net/batman-adv/packet.h61
-rw-r--r--net/batman-adv/routing.c45
-rw-r--r--net/batman-adv/send.c100
-rw-r--r--net/batman-adv/send.h4
-rw-r--r--net/batman-adv/soft-interface.c13
-rw-r--r--net/batman-adv/sysfs.c29
-rw-r--r--net/batman-adv/tp_meter.c1507
-rw-r--r--net/batman-adv/tp_meter.h34
-rw-r--r--net/batman-adv/translation-table.c10
-rw-r--r--net/batman-adv/tvlv.c632
-rw-r--r--net/batman-adv/tvlv.h61
-rw-r--r--net/batman-adv/types.h258
-rw-r--r--net/bridge/br_device.c12
-rw-r--r--net/bridge/br_forward.c13
-rw-r--r--net/bridge/br_if.c9
-rw-r--r--net/bridge/br_input.c3
-rw-r--r--net/bridge/br_multicast.c217
-rw-r--r--net/bridge/br_netfilter_hooks.c2
-rw-r--r--net/bridge/br_netlink.c148
-rw-r--r--net/bridge/br_private.h41
-rw-r--r--net/bridge/br_sysfs_br.c25
-rw-r--r--net/bridge/netfilter/ebt_802_3.c6
-rw-r--r--net/bridge/netfilter/ebt_arp.c43
-rw-r--r--net/bridge/netfilter/ebt_ip.c28
-rw-r--r--net/bridge/netfilter/ebt_ip6.c41
-rw-r--r--net/bridge/netfilter/ebt_stp.c97
-rw-r--r--net/bridge/netfilter/ebtables.c32
-rw-r--r--net/core/dev.c90
-rw-r--r--net/core/devlink.c87
-rw-r--r--net/core/fib_rules.c49
-rw-r--r--net/core/filter.c304
-rw-r--r--net/core/flow_dissector.c43
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-sysfs.c15
-rw-r--r--net/core/pktgen.c42
-rw-r--r--net/core/rtnetlink.c66
-rw-r--r--net/core/skbuff.c18
-rw-r--r--net/decnet/dn_fib.c21
-rw-r--r--net/ieee802154/6lowpan/core.c2
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c41
-rw-r--r--net/ipv4/netfilter/ip_tables.c20
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c4
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c3
-rw-r--r--net/ipv4/tcp.c67
-rw-r--r--net/ipv4/tcp_ipv4.c31
-rw-r--r--net/ipv6/ip6_fib.c1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c16
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c4
-rw-r--r--net/ipv6/tcp_ipv6.c29
-rw-r--r--net/kcm/kcmsock.c8
-rw-r--r--net/netfilter/Kconfig10
-rw-r--r--net/netfilter/nf_conntrack_core.c76
-rw-r--r--net/netfilter/nf_conntrack_helper.c61
-rw-r--r--net/netfilter/nf_conntrack_standalone.c36
-rw-r--r--net/netfilter/nf_log.c33
-rw-r--r--net/netfilter/nf_tables_api.c366
-rw-r--r--net/netfilter/nfnetlink_log.c9
-rw-r--r--net/netfilter/nft_dynset.c7
-rw-r--r--net/netfilter/nft_hash.c6
-rw-r--r--net/netfilter/nft_log.c21
-rw-r--r--net/netfilter/nft_lookup.c43
-rw-r--r--net/netfilter/nft_meta.c9
-rw-r--r--net/netfilter/nft_rbtree.c2
-rw-r--r--net/netfilter/x_tables.c3
-rw-r--r--net/netfilter/xt_NFLOG.c3
-rw-r--r--net/netfilter/xt_TRACE.c25
-rw-r--r--net/netfilter/xt_owner.c41
-rw-r--r--net/netfilter/xt_tcpudp.c7
-rw-r--r--net/packet/af_packet.c8
-rw-r--r--net/rds/connection.c39
-rw-r--r--net/rds/ib.c8
-rw-r--r--net/rds/ib.h8
-rw-r--r--net/rds/ib_cm.c6
-rw-r--r--net/rds/ib_recv.c3
-rw-r--r--net/rds/ib_send.c3
-rw-r--r--net/rds/loop.c14
-rw-r--r--net/rds/rds.h7
-rw-r--r--net/rds/recv.c4
-rw-r--r--net/rds/send.c16
-rw-r--r--net/rds/tcp.c135
-rw-r--r--net/rds/tcp.h22
-rw-r--r--net/rds/tcp_connect.c38
-rw-r--r--net/rds/tcp_listen.c16
-rw-r--r--net/rds/tcp_recv.c39
-rw-r--r--net/rds/tcp_send.c20
-rw-r--r--net/rds/threads.c12
-rw-r--r--net/sched/act_bpf.c7
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_skbedit.c26
-rw-r--r--net/sched/cls_bpf.c7
-rw-r--r--net/sched/sch_hfsc.c54
-rw-r--r--net/tipc/netlink_compat.c2
128 files changed, 6470 insertions, 2261 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 86ae75b77390..c8f422c90856 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
#endif
.ndo_fix_features = vlan_dev_fix_features,
+ .ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
+ .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy,
.ndo_fdb_add = switchdev_port_fdb_add,
.ndo_fdb_del = switchdev_port_fdb_del,
.ndo_fdb_dump = switchdev_port_fdb_dump,
diff --git a/net/atm/clip.c b/net/atm/clip.c
index e07f551a863c..53b4ac09e7b7 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = {
.connected_output = neigh_direct_output,
};
-static int clip_constructor(struct neighbour *neigh)
+static int clip_constructor(struct net_device *dev, struct neighbour *neigh)
{
struct atmarp_entry *entry = neighbour_priv(neigh);
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index f66930ee3c0b..833bb145ba3c 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -66,7 +66,7 @@ config BATMAN_ADV_NC
config BATMAN_ADV_MCAST
bool "Multicast optimisation"
- depends on BATMAN_ADV
+ depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
default n
help
This option enables the multicast optimisation which aims to
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 797cf2fc88c1..a83fc6c58d19 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -17,6 +17,7 @@
#
obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
+batman-adv-y += bat_algo.o
batman-adv-y += bat_iv_ogm.o
batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v.o
batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
@@ -31,12 +32,16 @@ batman-adv-y += gateway_common.o
batman-adv-y += hard-interface.o
batman-adv-y += hash.o
batman-adv-y += icmp_socket.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
batman-adv-y += main.o
batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-y += netlink.o
batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
batman-adv-y += routing.o
batman-adv-y += send.o
batman-adv-y += soft-interface.o
batman-adv-y += sysfs.o
+batman-adv-y += tp_meter.o
batman-adv-y += translation-table.o
+batman-adv-y += tvlv.o
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
new file mode 100644
index 000000000000..81dbbf569bd4
--- /dev/null
+++ b/net/batman-adv/bat_algo.c
@@ -0,0 +1,140 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+#include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+
+#include "bat_algo.h"
+
+char batadv_routing_algo[20] = "BATMAN_IV";
+static struct hlist_head batadv_algo_list;
+
+/**
+ * batadv_algo_init - Initialize batman-adv algorithm management data structures
+ */
+void batadv_algo_init(void)
+{
+ INIT_HLIST_HEAD(&batadv_algo_list);
+}
+
+static struct batadv_algo_ops *batadv_algo_get(char *name)
+{
+ struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
+
+ hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
+ if (strcmp(bat_algo_ops_tmp->name, name) != 0)
+ continue;
+
+ bat_algo_ops = bat_algo_ops_tmp;
+ break;
+ }
+
+ return bat_algo_ops;
+}
+
+int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
+{
+ struct batadv_algo_ops *bat_algo_ops_tmp;
+
+ bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
+ if (bat_algo_ops_tmp) {
+ pr_info("Trying to register already registered routing algorithm: %s\n",
+ bat_algo_ops->name);
+ return -EEXIST;
+ }
+
+ /* all algorithms must implement all ops (for now) */
+ if (!bat_algo_ops->iface.enable ||
+ !bat_algo_ops->iface.disable ||
+ !bat_algo_ops->iface.update_mac ||
+ !bat_algo_ops->iface.primary_set ||
+ !bat_algo_ops->neigh.cmp ||
+ !bat_algo_ops->neigh.is_similar_or_better) {
+ pr_info("Routing algo '%s' does not implement required ops\n",
+ bat_algo_ops->name);
+ return -EINVAL;
+ }
+
+ INIT_HLIST_NODE(&bat_algo_ops->list);
+ hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
+
+ return 0;
+}
+
+int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
+{
+ struct batadv_algo_ops *bat_algo_ops;
+
+ bat_algo_ops = batadv_algo_get(name);
+ if (!bat_algo_ops)
+ return -EINVAL;
+
+ bat_priv->algo_ops = bat_algo_ops;
+
+ return 0;
+}
+
+int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
+{
+ struct batadv_algo_ops *bat_algo_ops;
+
+ seq_puts(seq, "Available routing algorithms:\n");
+
+ hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+ seq_printf(seq, " * %s\n", bat_algo_ops->name);
+ }
+
+ return 0;
+}
+
+static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
+{
+ struct batadv_algo_ops *bat_algo_ops;
+ char *algo_name = (char *)val;
+ size_t name_len = strlen(algo_name);
+
+ if (name_len > 0 && algo_name[name_len - 1] == '\n')
+ algo_name[name_len - 1] = '\0';
+
+ bat_algo_ops = batadv_algo_get(algo_name);
+ if (!bat_algo_ops) {
+ pr_err("Routing algorithm '%s' is not supported\n", algo_name);
+ return -EINVAL;
+ }
+
+ return param_set_copystring(algo_name, kp);
+}
+
+static const struct kernel_param_ops batadv_param_ops_ra = {
+ .set = batadv_param_set_ra,
+ .get = param_get_string,
+};
+
+static struct kparam_string batadv_param_string_ra = {
+ .maxlen = sizeof(batadv_routing_algo),
+ .string = batadv_routing_algo,
+};
+
+module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
+ 0644);
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 03dafd33d23b..860d773dd8fa 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -18,32 +18,18 @@
#ifndef _NET_BATMAN_ADV_BAT_ALGO_H_
#define _NET_BATMAN_ADV_BAT_ALGO_H_
-struct batadv_priv;
+#include "main.h"
-int batadv_iv_init(void);
+#include <linux/types.h>
-#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+struct seq_file;
-int batadv_v_init(void);
-int batadv_v_mesh_init(struct batadv_priv *bat_priv);
-void batadv_v_mesh_free(struct batadv_priv *bat_priv);
+extern char batadv_routing_algo[];
+extern struct list_head batadv_hardif_list;
-#else
-
-static inline int batadv_v_init(void)
-{
- return 0;
-}
-
-static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
-{
- return 0;
-}
-
-static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
-{
-}
-
-#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+void batadv_algo_init(void);
+int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
+int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
+int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
#endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index ce2f203048d3..19b0abd6c640 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -15,7 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "bat_algo.h"
+#include "bat_iv_ogm.h"
#include "main.h"
#include <linux/atomic.h>
@@ -30,8 +30,9 @@
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/jiffies.h>
-#include <linux/list.h>
+#include <linux/kernel.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/pkt_sched.h>
@@ -48,15 +49,20 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "bitarray.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "network-coding.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
#include "send.h"
#include "translation-table.h"
+#include "tvlv.h"
+
+static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work);
/**
* enum batadv_dup_status - duplicate status
@@ -336,7 +342,8 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
{
struct batadv_neigh_node *neigh_node;
- neigh_node = batadv_neigh_node_new(orig_node, hard_iface, neigh_addr);
+ neigh_node = batadv_neigh_node_get_or_create(orig_node,
+ hard_iface, neigh_addr);
if (!neigh_node)
goto out;
@@ -730,7 +737,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
/* start timer for this packet */
INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
- batadv_send_outstanding_bat_ogm_packet);
+ batadv_iv_send_outstanding_bat_ogm_packet);
queue_delayed_work(batadv_event_workqueue,
&forw_packet_aggr->delayed_work,
send_time - jiffies);
@@ -937,6 +944,19 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
u16 tvlv_len = 0;
unsigned long send_time;
+ if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
+ (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+ return;
+
+ /* the interface gets activated here to avoid race conditions between
+ * the moment of activating the interface in
+ * hardif_activate_interface() where the originator mac is set and
+ * outdated packets (especially uninitialized mac addresses) in the
+ * packet queue
+ */
+ if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
+ hard_iface->if_status = BATADV_IF_ACTIVE;
+
primary_if = batadv_primary_if_get_selected(bat_priv);
if (hard_iface == primary_if) {
@@ -1778,6 +1798,45 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
batadv_orig_node_put(orig_node);
}
+static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
+{
+ struct delayed_work *delayed_work;
+ struct batadv_forw_packet *forw_packet;
+ struct batadv_priv *bat_priv;
+
+ delayed_work = to_delayed_work(work);
+ forw_packet = container_of(delayed_work, struct batadv_forw_packet,
+ delayed_work);
+ bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
+ spin_lock_bh(&bat_priv->forw_bat_list_lock);
+ hlist_del(&forw_packet->list);
+ spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
+ goto out;
+
+ batadv_iv_ogm_emit(forw_packet);
+
+ /* we have to have at least one packet in the queue to determine the
+ * queues wake up time unless we are shutting down.
+ *
+ * only re-schedule if this is the "original" copy, e.g. the OGM of the
+ * primary interface should only be rescheduled once per period, but
+ * this function will be called for the forw_packet instances of the
+ * other secondary interfaces as well.
+ */
+ if (forw_packet->own &&
+ forw_packet->if_incoming == forw_packet->if_outgoing)
+ batadv_iv_ogm_schedule(forw_packet->if_incoming);
+
+out:
+ /* don't count own packet */
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->batman_queue_left);
+
+ batadv_forw_packet_free(forw_packet);
+}
+
static int batadv_iv_ogm_receive(struct sk_buff *skb,
struct batadv_hard_iface *if_incoming)
{
@@ -1794,7 +1853,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
/* did we receive a B.A.T.M.A.N. IV OGM packet on an interface
* that does not have B.A.T.M.A.N. IV enabled ?
*/
- if (bat_priv->bat_algo_ops->bat_ogm_emit != batadv_iv_ogm_emit)
+ if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable)
return NET_RX_DROP;
batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
@@ -2052,21 +2111,32 @@ out:
return ret;
}
+static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
+{
+ /* begin scheduling originator messages on that interface */
+ batadv_iv_ogm_schedule(hard_iface);
+}
+
static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
.name = "BATMAN_IV",
- .bat_iface_enable = batadv_iv_ogm_iface_enable,
- .bat_iface_disable = batadv_iv_ogm_iface_disable,
- .bat_iface_update_mac = batadv_iv_ogm_iface_update_mac,
- .bat_primary_iface_set = batadv_iv_ogm_primary_iface_set,
- .bat_ogm_schedule = batadv_iv_ogm_schedule,
- .bat_ogm_emit = batadv_iv_ogm_emit,
- .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
- .bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
- .bat_neigh_print = batadv_iv_neigh_print,
- .bat_orig_print = batadv_iv_ogm_orig_print,
- .bat_orig_free = batadv_iv_ogm_orig_free,
- .bat_orig_add_if = batadv_iv_ogm_orig_add_if,
- .bat_orig_del_if = batadv_iv_ogm_orig_del_if,
+ .iface = {
+ .activate = batadv_iv_iface_activate,
+ .enable = batadv_iv_ogm_iface_enable,
+ .disable = batadv_iv_ogm_iface_disable,
+ .update_mac = batadv_iv_ogm_iface_update_mac,
+ .primary_set = batadv_iv_ogm_primary_iface_set,
+ },
+ .neigh = {
+ .cmp = batadv_iv_ogm_neigh_cmp,
+ .is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+ .print = batadv_iv_neigh_print,
+ },
+ .orig = {
+ .print = batadv_iv_ogm_orig_print,
+ .free = batadv_iv_ogm_orig_free,
+ .add_if = batadv_iv_ogm_orig_add_if,
+ .del_if = batadv_iv_ogm_orig_del_if,
+ },
};
int __init batadv_iv_init(void)
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
new file mode 100644
index 000000000000..b9f3550faaf7
--- /dev/null
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -0,0 +1,25 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _BATMAN_ADV_BATADV_IV_OGM_H_
+#define _BATMAN_ADV_BATADV_IV_OGM_H_
+
+#include "main.h"
+
+int batadv_iv_init(void);
+
+#endif /* _BATMAN_ADV_BATADV_IV_OGM_H_ */
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 0a12e5cdd65d..0366cbf5e444 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -15,7 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "bat_algo.h"
+#include "bat_v.h"
#include "main.h"
#include <linux/atomic.h>
@@ -31,6 +31,7 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "bat_v_elp.h"
#include "bat_v_ogm.h"
#include "hard-interface.h"
@@ -70,11 +71,6 @@ static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
if (ret < 0)
batadv_v_elp_iface_disable(hard_iface);
- /* enable link throughput auto-detection by setting the throughput
- * override to zero
- */
- atomic_set(&hard_iface->bat_v.throughput_override, 0);
-
return ret;
}
@@ -119,14 +115,6 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
batadv_v_elp_throughput_metric_update);
}
-static void batadv_v_ogm_schedule(struct batadv_hard_iface *hard_iface)
-{
-}
-
-static void batadv_v_ogm_emit(struct batadv_forw_packet *forw_packet)
-{
-}
-
/**
* batadv_v_orig_print_neigh - print neighbors for the originator table
* @orig_node: the orig_node for which the neighbors are printed
@@ -334,21 +322,39 @@ err_ifinfo1:
static struct batadv_algo_ops batadv_batman_v __read_mostly = {
.name = "BATMAN_V",
- .bat_iface_activate = batadv_v_iface_activate,
- .bat_iface_enable = batadv_v_iface_enable,
- .bat_iface_disable = batadv_v_iface_disable,
- .bat_iface_update_mac = batadv_v_iface_update_mac,
- .bat_primary_iface_set = batadv_v_primary_iface_set,
- .bat_hardif_neigh_init = batadv_v_hardif_neigh_init,
- .bat_ogm_emit = batadv_v_ogm_emit,
- .bat_ogm_schedule = batadv_v_ogm_schedule,
- .bat_orig_print = batadv_v_orig_print,
- .bat_neigh_cmp = batadv_v_neigh_cmp,
- .bat_neigh_is_similar_or_better = batadv_v_neigh_is_sob,
- .bat_neigh_print = batadv_v_neigh_print,
+ .iface = {
+ .activate = batadv_v_iface_activate,
+ .enable = batadv_v_iface_enable,
+ .disable = batadv_v_iface_disable,
+ .update_mac = batadv_v_iface_update_mac,
+ .primary_set = batadv_v_primary_iface_set,
+ },
+ .neigh = {
+ .hardif_init = batadv_v_hardif_neigh_init,
+ .cmp = batadv_v_neigh_cmp,
+ .is_similar_or_better = batadv_v_neigh_is_sob,
+ .print = batadv_v_neigh_print,
+ },
+ .orig = {
+ .print = batadv_v_orig_print,
+ },
};
/**
+ * batadv_v_hardif_init - initialize the algorithm specific fields in the
+ * hard-interface object
+ * @hard_iface: the hard-interface to initialize
+ */
+void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface)
+{
+ /* enable link throughput auto-detection by setting the throughput
+ * override to zero
+ */
+ atomic_set(&hard_iface->bat_v.throughput_override, 0);
+ atomic_set(&hard_iface->bat_v.elp_interval, 500);
+}
+
+/**
* batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a
* mesh
* @bat_priv: the object representing the mesh interface to initialise
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
new file mode 100644
index 000000000000..83b77639729e
--- /dev/null
+++ b/net/batman-adv/bat_v.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Linus Lüssing
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_BAT_V_H_
+#define _NET_BATMAN_ADV_BAT_V_H_
+
+#include "main.h"
+
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+
+int batadv_v_init(void);
+void batadv_v_hardif_init(struct batadv_hard_iface *hardif);
+int batadv_v_mesh_init(struct batadv_priv *bat_priv);
+void batadv_v_mesh_free(struct batadv_priv *bat_priv);
+
+#else
+
+static inline int batadv_v_init(void)
+{
+ return 0;
+}
+
+static inline void batadv_v_hardif_init(struct batadv_hard_iface *hardif)
+{
+}
+
+static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
+{
+}
+
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
+#endif /* _NET_BATMAN_ADV_BAT_V_H_ */
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index df42eb1365a0..7d170010beb9 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -43,6 +43,7 @@
#include "bat_algo.h"
#include "bat_v_ogm.h"
#include "hard-interface.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
@@ -344,7 +345,6 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
/* randomize initial seqno to avoid collision */
get_random_bytes(&random_seqno, sizeof(random_seqno));
atomic_set(&hard_iface->bat_v.elp_seqno, random_seqno);
- atomic_set(&hard_iface->bat_v.elp_interval, 500);
/* assume full-duplex by default */
hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
@@ -443,7 +443,8 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv,
if (!orig_neigh)
return;
- neigh = batadv_neigh_node_new(orig_neigh, if_incoming, neigh_addr);
+ neigh = batadv_neigh_node_get_or_create(orig_neigh,
+ if_incoming, neigh_addr);
if (!neigh)
goto orig_free;
@@ -503,7 +504,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
/* did we receive a B.A.T.M.A.N. V ELP packet on an interface
* that does not have B.A.T.M.A.N. V ELP enabled ?
*/
- if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
+ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
return NET_RX_DROP;
elp_packet = (struct batadv_elp_packet *)skb->data;
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index cc130b2d05e5..be17c0b1369e 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -15,11 +15,11 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "main.h"
-
#ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_
#define _NET_BATMAN_ADV_BAT_V_ELP_H_
+#include "main.h"
+
struct sk_buff;
struct work_struct;
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 473ebb9a0e73..6fbba4eb0617 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -39,13 +39,16 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
#include "send.h"
#include "translation-table.h"
+#include "tvlv.h"
/**
* batadv_v_ogm_orig_get - retrieve and possibly create an originator node
@@ -683,8 +686,8 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
if (!orig_node)
return;
- neigh_node = batadv_neigh_node_new(orig_node, if_incoming,
- ethhdr->h_source);
+ neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming,
+ ethhdr->h_source);
if (!neigh_node)
goto out;
@@ -751,7 +754,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
/* did we receive a OGM2 packet on an interface that does not have
* B.A.T.M.A.N. V enabled ?
*/
- if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
+ if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
return NET_RX_DROP;
if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index d849c75ada0e..4c4d45caa422 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -18,10 +18,10 @@
#ifndef _BATMAN_ADV_BATADV_V_OGM_H_
#define _BATMAN_ADV_BATADV_V_OGM_H_
+#include "main.h"
+
#include <linux/types.h>
-struct batadv_hard_iface;
-struct batadv_priv;
struct sk_buff;
int batadv_v_ogm_init(struct batadv_priv *bat_priv);
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index a0c7913837a5..032271421a20 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -20,6 +20,8 @@
#include <linux/bitmap.h>
+#include "log.h"
+
/* shift the packet array by n places. */
static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
{
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 748a9ead7ce5..e4f7494fb974 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -48,6 +48,7 @@
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "sysfs.h"
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 952900466d88..1d68b6e63b96 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -18,245 +18,33 @@
#include "debugfs.h"
#include "main.h"
-#include <linux/compiler.h>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/export.h>
-#include <linux/fcntl.h>
#include <linux/fs.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/netdevice.h>
-#include <linux/poll.h>
#include <linux/printk.h>
#include <linux/sched.h> /* for linux/wait.h */
#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <linux/sysfs.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-#include <linux/wait.h>
-#include <stdarg.h>
+#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
#include "gateway_client.h"
#include "icmp_socket.h"
+#include "log.h"
+#include "multicast.h"
#include "network-coding.h"
#include "originator.h"
#include "translation-table.h"
static struct dentry *batadv_debugfs;
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
-
-static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
-
-static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
- size_t idx)
-{
- return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
-}
-
-static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
- char c)
-{
- char *char_addr;
-
- char_addr = batadv_log_char_addr(debug_log, debug_log->log_end);
- *char_addr = c;
- debug_log->log_end++;
-
- if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len)
- debug_log->log_start = debug_log->log_end - batadv_log_buff_len;
-}
-
-__printf(2, 3)
-static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
- const char *fmt, ...)
-{
- va_list args;
- static char debug_log_buf[256];
- char *p;
-
- if (!debug_log)
- return 0;
-
- spin_lock_bh(&debug_log->lock);
- va_start(args, fmt);
- vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
- va_end(args);
-
- for (p = debug_log_buf; *p != 0; p++)
- batadv_emit_log_char(debug_log, *p);
-
- spin_unlock_bh(&debug_log->lock);
-
- wake_up(&debug_log->queue_wait);
-
- return 0;
-}
-
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-{
- va_list args;
- char tmp_log_buf[256];
-
- va_start(args, fmt);
- vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
- batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
- jiffies_to_msecs(jiffies), tmp_log_buf);
- va_end(args);
-
- return 0;
-}
-
-static int batadv_log_open(struct inode *inode, struct file *file)
-{
- if (!try_module_get(THIS_MODULE))
- return -EBUSY;
-
- nonseekable_open(inode, file);
- file->private_data = inode->i_private;
- return 0;
-}
-
-static int batadv_log_release(struct inode *inode, struct file *file)
-{
- module_put(THIS_MODULE);
- return 0;
-}
-
-static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
-{
- return !(debug_log->log_start - debug_log->log_end);
-}
-
-static ssize_t batadv_log_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct batadv_priv *bat_priv = file->private_data;
- struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
- int error, i = 0;
- char *char_addr;
- char c;
-
- if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log))
- return -EAGAIN;
-
- if (!buf)
- return -EINVAL;
-
- if (count == 0)
- return 0;
-
- if (!access_ok(VERIFY_WRITE, buf, count))
- return -EFAULT;
-
- error = wait_event_interruptible(debug_log->queue_wait,
- (!batadv_log_empty(debug_log)));
-
- if (error)
- return error;
-
- spin_lock_bh(&debug_log->lock);
-
- while ((!error) && (i < count) &&
- (debug_log->log_start != debug_log->log_end)) {
- char_addr = batadv_log_char_addr(debug_log,
- debug_log->log_start);
- c = *char_addr;
-
- debug_log->log_start++;
-
- spin_unlock_bh(&debug_log->lock);
-
- error = __put_user(c, buf);
-
- spin_lock_bh(&debug_log->lock);
-
- buf++;
- i++;
- }
-
- spin_unlock_bh(&debug_log->lock);
-
- if (!error)
- return i;
-
- return error;
-}
-
-static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
-{
- struct batadv_priv *bat_priv = file->private_data;
- struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
-
- poll_wait(file, &debug_log->queue_wait, wait);
-
- if (!batadv_log_empty(debug_log))
- return POLLIN | POLLRDNORM;
-
- return 0;
-}
-
-static const struct file_operations batadv_log_fops = {
- .open = batadv_log_open,
- .release = batadv_log_release,
- .read = batadv_log_read,
- .poll = batadv_log_poll,
- .llseek = no_llseek,
-};
-
-static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
-{
- struct dentry *d;
-
- if (!bat_priv->debug_dir)
- goto err;
-
- bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
- if (!bat_priv->debug_log)
- goto err;
-
- spin_lock_init(&bat_priv->debug_log->lock);
- init_waitqueue_head(&bat_priv->debug_log->queue_wait);
-
- d = debugfs_create_file("log", S_IFREG | S_IRUSR,
- bat_priv->debug_dir, bat_priv,
- &batadv_log_fops);
- if (!d)
- goto err;
-
- return 0;
-
-err:
- return -ENOMEM;
-}
-
-static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
-{
- kfree(bat_priv->debug_log);
- bat_priv->debug_log = NULL;
-}
-#else /* CONFIG_BATMAN_ADV_DEBUG */
-static int batadv_debug_log_setup(struct batadv_priv *bat_priv)
-{
- return 0;
-}
-
-static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
-{
-}
-#endif
-
static int batadv_algorithms_open(struct inode *inode, struct file *file)
{
return single_open(file, batadv_algo_seq_print_text, NULL);
@@ -363,6 +151,22 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
}
#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * batadv_mcast_flags_open - prepare file handler for reads from mcast_flags
+ * @inode: inode which was opened
+ * @file: file handle to be initialized
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_mcast_flags_open(struct inode *inode, struct file *file)
+{
+ struct net_device *net_dev = (struct net_device *)inode->i_private;
+
+ return single_open(file, batadv_mcast_flags_seq_print_text, net_dev);
+}
+#endif
+
#define BATADV_DEBUGINFO(_name, _mode, _open) \
struct batadv_debuginfo batadv_debuginfo_##_name = { \
.attr = { \
@@ -407,6 +211,9 @@ static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
#ifdef CONFIG_BATMAN_ADV_NC
static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open);
+#endif
static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
&batadv_debuginfo_neighbors,
@@ -424,6 +231,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
#ifdef CONFIG_BATMAN_ADV_NC
&batadv_debuginfo_nc_nodes,
#endif
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ &batadv_debuginfo_mcast_flags,
+#endif
NULL,
};
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 278800a99c69..fa7646532a13 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -45,9 +45,11 @@
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "originator.h"
#include "send.h"
#include "translation-table.h"
+#include "tvlv.h"
static void batadv_dat_purge(struct work_struct *work);
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 65536db1bff7..0934730fb7ff 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -27,7 +27,6 @@
#include <linux/kernel.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
-#include <linux/pkt_sched.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -414,7 +413,7 @@ static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
if (!skb_fragment)
goto err;
- skb->priority = TC_PRIO_CONTROL;
+ skb_fragment->priority = skb->priority;
/* Eat the last mtu-bytes of the skb */
skb_reserve(skb_fragment, header_size + ETH_HLEN);
@@ -434,11 +433,12 @@ err:
* @orig_node: final destination of the created fragments
* @neigh_node: next-hop of the created fragments
*
- * Return: true on success, false otherwise.
+ * Return: the netdev tx status or -1 in case of error.
+ * When -1 is returned the skb is not consumed.
*/
-bool batadv_frag_send_packet(struct sk_buff *skb,
- struct batadv_orig_node *orig_node,
- struct batadv_neigh_node *neigh_node)
+int batadv_frag_send_packet(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node)
{
struct batadv_priv *bat_priv;
struct batadv_hard_iface *primary_if = NULL;
@@ -447,7 +447,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
unsigned int header_size = sizeof(frag_header);
unsigned int max_fragment_size, max_packet_size;
- bool ret = false;
+ int ret = -1;
/* To avoid merge and refragmentation at next-hops we never send
* fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -458,12 +458,12 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
/* Don't even try to fragment, if we need more than 16 fragments */
if (skb->len > max_packet_size)
- goto out_err;
+ goto out;
bat_priv = orig_node->bat_priv;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
- goto out_err;
+ goto out;
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
@@ -473,6 +473,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
frag_header.reserved = 0;
frag_header.no = 0;
frag_header.total_size = htons(skb->len);
+
+ /* skb->priority values from 256->263 are magic values to
+ * directly indicate a specific 802.1d priority. This is used
+ * to allow 802.1d priority to be passed directly in from VLAN
+ * tags, etc.
+ */
+ if (skb->priority >= 256 && skb->priority <= 263)
+ frag_header.priority = skb->priority - 256;
+
ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
ether_addr_copy(frag_header.dest, orig_node->orig);
@@ -480,23 +489,33 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
while (skb->len > max_fragment_size) {
skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
if (!skb_fragment)
- goto out_err;
+ goto out;
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
skb_fragment->len + ETH_HLEN);
- batadv_send_unicast_skb(skb_fragment, neigh_node);
+ ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
+ if (ret != NET_XMIT_SUCCESS) {
+ /* return -1 so that the caller can free the original
+ * skb
+ */
+ ret = -1;
+ goto out;
+ }
+
frag_header.no++;
/* The initial check in this function should cover this case */
- if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
- goto out_err;
+ if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
+ ret = -1;
+ goto out;
+ }
}
/* Make room for the fragment header. */
if (batadv_skb_head_push(skb, header_size) < 0 ||
pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
- goto out_err;
+ goto out;
memcpy(skb->data, &frag_header, header_size);
@@ -504,11 +523,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
skb->len + ETH_HLEN);
- batadv_send_unicast_skb(skb, neigh_node);
-
- ret = true;
+ ret = batadv_send_unicast_skb(skb, neigh_node);
-out_err:
+out:
if (primary_if)
batadv_hardif_put(primary_if);
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 9ff77c7ef7c7..3202fe329e63 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -34,9 +34,9 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_orig_node *orig_node_src);
bool batadv_frag_skb_buffer(struct sk_buff **skb,
struct batadv_orig_node *orig_node);
-bool batadv_frag_send_packet(struct sk_buff *skb,
- struct batadv_orig_node *orig_node,
- struct batadv_neigh_node *neigh_node);
+int batadv_frag_send_packet(struct sk_buff *skb,
+ struct batadv_orig_node *orig_node,
+ struct batadv_neigh_node *neigh_node);
/**
* batadv_frag_check_entry - check if a list of fragments has timed out
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 5839c569f769..63a805d3f96e 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -42,6 +42,7 @@
#include "gateway_common.h"
#include "hard-interface.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
@@ -192,7 +193,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
tq_avg = router_ifinfo->bat_iv.tq_avg;
- switch (atomic_read(&bat_priv->gw_sel_class)) {
+ switch (atomic_read(&bat_priv->gw.sel_class)) {
case 1: /* fast connection */
tmp_gw_factor = tq_avg * tq_avg;
tmp_gw_factor *= gw_node->bandwidth_down;
@@ -255,7 +256,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
{
struct batadv_gw_node *curr_gw;
- if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
+ if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
return;
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
@@ -283,7 +284,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
char gw_addr[18] = { '\0' };
- if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
+ if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
goto out;
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
@@ -402,8 +403,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
/* if the routing class is greater than 3 the value tells us how much
* greater the TQ value of the new gateway must be
*/
- if ((atomic_read(&bat_priv->gw_sel_class) > 3) &&
- (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class)))
+ if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
+ (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
goto out;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -638,8 +639,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
goto out;
seq_printf(seq,
- " %-12s (%s/%i) %17s [%10s]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
- "Gateway", "#", BATADV_TQ_MAX_VALUE, "Nexthop", "outgoingIF",
+ " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
BATADV_SOURCE_VERSION, primary_if->net_dev->name,
primary_if->net_dev->dev_addr, net_dev->name);
@@ -821,7 +821,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
if (!gw_node)
goto out;
- switch (atomic_read(&bat_priv->gw_mode)) {
+ switch (atomic_read(&bat_priv->gw.mode)) {
case BATADV_GW_MODE_SERVER:
/* If we are a GW then we are our best GW. We can artificially
* set the tq towards ourself as the maximum value
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 4423047889e1..d7bc6a87bcc9 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -19,8 +19,8 @@
#include "main.h"
#include <linux/atomic.h>
-#include <linux/errno.h>
#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/math64.h>
#include <linux/netdevice.h>
@@ -28,7 +28,9 @@
#include <linux/string.h>
#include "gateway_client.h"
+#include "log.h"
#include "packet.h"
+#include "tvlv.h"
/**
* batadv_parse_throughput - parse supplied string buffer to extract throughput
@@ -144,7 +146,7 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv)
u32 down, up;
char gw_mode;
- gw_mode = atomic_read(&bat_priv->gw_mode);
+ gw_mode = atomic_read(&bat_priv->gw.mode);
switch (gw_mode) {
case BATADV_GW_MODE_OFF:
@@ -241,8 +243,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
/* restart gateway selection if fast or late switching was enabled */
if ((gateway.bandwidth_down != 0) &&
- (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) &&
- (atomic_read(&bat_priv->gw_sel_class) > 2))
+ (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) &&
+ (atomic_read(&bat_priv->gw.sel_class) > 2))
batadv_gw_check_election(bat_priv, orig);
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 8c2f39962fa5..1f9080840566 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -23,9 +23,9 @@
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
-#include <linux/if.h>
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -37,10 +37,12 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
+#include "bat_v.h"
#include "bridge_loop_avoidance.h"
#include "debugfs.h"
#include "distributed-arp-table.h"
#include "gateway_client.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "send.h"
@@ -245,7 +247,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
if (!new_hard_iface)
goto out;
- bat_priv->bat_algo_ops->bat_primary_iface_set(new_hard_iface);
+ bat_priv->algo_ops->iface.primary_set(new_hard_iface);
batadv_primary_if_update_addr(bat_priv, curr_hard_iface);
out:
@@ -392,7 +394,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
bat_priv = netdev_priv(hard_iface->soft_iface);
- bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface);
+ bat_priv->algo_ops->iface.update_mac(hard_iface);
hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED;
/* the first active interface becomes our primary interface or
@@ -407,8 +409,8 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
batadv_update_min_mtu(hard_iface->soft_iface);
- if (bat_priv->bat_algo_ops->bat_iface_activate)
- bat_priv->bat_algo_ops->bat_iface_activate(hard_iface);
+ if (bat_priv->algo_ops->iface.activate)
+ bat_priv->algo_ops->iface.activate(hard_iface);
out:
if (primary_if)
@@ -506,7 +508,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
if (ret)
goto err_dev;
- ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
+ ret = bat_priv->algo_ops->iface.enable(hard_iface);
if (ret < 0)
goto err_upper;
@@ -515,7 +517,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->if_status = BATADV_IF_INACTIVE;
ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
if (ret < 0) {
- bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+ bat_priv->algo_ops->iface.disable(hard_iface);
bat_priv->num_ifaces--;
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
goto err_upper;
@@ -553,9 +555,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
batadv_hardif_recalc_extra_skbroom(soft_iface);
- /* begin scheduling originator messages on that interface */
- batadv_schedule_bat_ogm(hard_iface);
-
out:
return 0;
@@ -599,7 +598,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
batadv_hardif_put(new_if);
}
- bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+ bat_priv->algo_ops->iface.disable(hard_iface);
hard_iface->if_status = BATADV_IF_NOT_IN_USE;
/* delete all references to this hard_iface */
@@ -686,6 +685,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
if (batadv_is_wifi_netdev(net_dev))
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
+ batadv_v_hardif_init(hard_iface);
+
/* extra reference for return */
kref_init(&hard_iface->refcount);
kref_get(&hard_iface->refcount);
@@ -782,7 +783,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
batadv_check_known_mac_addr(hard_iface->net_dev);
bat_priv = netdev_priv(hard_iface->soft_iface);
- bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface);
+ bat_priv->algo_ops->iface.update_mac(hard_iface);
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 777aea10cd8f..378cc1119d66 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -45,6 +45,7 @@
#include <linux/wait.h>
#include "hard-interface.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "send.h"
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
new file mode 100644
index 000000000000..56dc532f7a2c
--- /dev/null
+++ b/net/batman-adv/log.c
@@ -0,0 +1,231 @@
+/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "log.h"
+#include "main.h"
+
+#include <linux/compiler.h>
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/sched.h> /* for linux/wait.h */
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+#include <stdarg.h>
+
+#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
+
+static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
+
+static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log,
+ size_t idx)
+{
+ return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK];
+}
+
+static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log,
+ char c)
+{
+ char *char_addr;
+
+ char_addr = batadv_log_char_addr(debug_log, debug_log->log_end);
+ *char_addr = c;
+ debug_log->log_end++;
+
+ if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len)
+ debug_log->log_start = debug_log->log_end - batadv_log_buff_len;
+}
+
+__printf(2, 3)
+static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
+ const char *fmt, ...)
+{
+ va_list args;
+ static char debug_log_buf[256];
+ char *p;
+
+ if (!debug_log)
+ return 0;
+
+ spin_lock_bh(&debug_log->lock);
+ va_start(args, fmt);
+ vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
+ va_end(args);
+
+ for (p = debug_log_buf; *p != 0; p++)
+ batadv_emit_log_char(debug_log, *p);
+
+ spin_unlock_bh(&debug_log->lock);
+
+ wake_up(&debug_log->queue_wait);
+
+ return 0;
+}
+
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+{
+ va_list args;
+ char tmp_log_buf[256];
+
+ va_start(args, fmt);
+ vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
+ batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s",
+ jiffies_to_msecs(jiffies), tmp_log_buf);
+ va_end(args);
+
+ return 0;
+}
+
+static int batadv_log_open(struct inode *inode, struct file *file)
+{
+ if (!try_module_get(THIS_MODULE))
+ return -EBUSY;
+
+ nonseekable_open(inode, file);
+ file->private_data = inode->i_private;
+ return 0;
+}
+
+static int batadv_log_release(struct inode *inode, struct file *file)
+{
+ module_put(THIS_MODULE);
+ return 0;
+}
+
+static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log)
+{
+ return !(debug_log->log_start - debug_log->log_end);
+}
+
+static ssize_t batadv_log_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct batadv_priv *bat_priv = file->private_data;
+ struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
+ int error, i = 0;
+ char *char_addr;
+ char c;
+
+ if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log))
+ return -EAGAIN;
+
+ if (!buf)
+ return -EINVAL;
+
+ if (count == 0)
+ return 0;
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ error = wait_event_interruptible(debug_log->queue_wait,
+ (!batadv_log_empty(debug_log)));
+
+ if (error)
+ return error;
+
+ spin_lock_bh(&debug_log->lock);
+
+ while ((!error) && (i < count) &&
+ (debug_log->log_start != debug_log->log_end)) {
+ char_addr = batadv_log_char_addr(debug_log,
+ debug_log->log_start);
+ c = *char_addr;
+
+ debug_log->log_start++;
+
+ spin_unlock_bh(&debug_log->lock);
+
+ error = __put_user(c, buf);
+
+ spin_lock_bh(&debug_log->lock);
+
+ buf++;
+ i++;
+ }
+
+ spin_unlock_bh(&debug_log->lock);
+
+ if (!error)
+ return i;
+
+ return error;
+}
+
+static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
+{
+ struct batadv_priv *bat_priv = file->private_data;
+ struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
+
+ poll_wait(file, &debug_log->queue_wait, wait);
+
+ if (!batadv_log_empty(debug_log))
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+static const struct file_operations batadv_log_fops = {
+ .open = batadv_log_open,
+ .release = batadv_log_release,
+ .read = batadv_log_read,
+ .poll = batadv_log_poll,
+ .llseek = no_llseek,
+};
+
+int batadv_debug_log_setup(struct batadv_priv *bat_priv)
+{
+ struct dentry *d;
+
+ if (!bat_priv->debug_dir)
+ goto err;
+
+ bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
+ if (!bat_priv->debug_log)
+ goto err;
+
+ spin_lock_init(&bat_priv->debug_log->lock);
+ init_waitqueue_head(&bat_priv->debug_log->queue_wait);
+
+ d = debugfs_create_file("log", S_IFREG | S_IRUSR,
+ bat_priv->debug_dir, bat_priv,
+ &batadv_log_fops);
+ if (!d)
+ goto err;
+
+ return 0;
+
+err:
+ return -ENOMEM;
+}
+
+void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
+{
+ kfree(bat_priv->debug_log);
+ bat_priv->debug_log = NULL;
+}
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
new file mode 100644
index 000000000000..e0e1a88c3e58
--- /dev/null
+++ b/net/batman-adv/log.h
@@ -0,0 +1,111 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_LOG_H_
+#define _NET_BATMAN_ADV_LOG_H_
+
+#include "main.h"
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/printk.h>
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+
+int batadv_debug_log_setup(struct batadv_priv *bat_priv);
+void batadv_debug_log_cleanup(struct batadv_priv *bat_priv);
+
+#else
+
+static inline int batadv_debug_log_setup(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
+{
+}
+
+#endif
+
+/**
+ * enum batadv_dbg_level - available log levels
+ * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
+ * @BATADV_DBG_ROUTES: route added / changed / deleted
+ * @BATADV_DBG_TT: translation table messages
+ * @BATADV_DBG_BLA: bridge loop avoidance messages
+ * @BATADV_DBG_DAT: ARP snooping and DAT related messages
+ * @BATADV_DBG_NC: network coding related messages
+ * @BATADV_DBG_MCAST: multicast related messages
+ * @BATADV_DBG_TP_METER: throughput meter messages
+ * @BATADV_DBG_ALL: the union of all the above log levels
+ */
+enum batadv_dbg_level {
+ BATADV_DBG_BATMAN = BIT(0),
+ BATADV_DBG_ROUTES = BIT(1),
+ BATADV_DBG_TT = BIT(2),
+ BATADV_DBG_BLA = BIT(3),
+ BATADV_DBG_DAT = BIT(4),
+ BATADV_DBG_NC = BIT(5),
+ BATADV_DBG_MCAST = BIT(6),
+ BATADV_DBG_TP_METER = BIT(7),
+ BATADV_DBG_ALL = 127,
+};
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+__printf(2, 3);
+
+/* possibly ratelimited debug output */
+#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
+ do { \
+ if (atomic_read(&bat_priv->log_level) & type && \
+ (!ratelimited || net_ratelimit())) \
+ batadv_debug_log(bat_priv, fmt, ## arg);\
+ } \
+ while (0)
+#else /* !CONFIG_BATMAN_ADV_DEBUG */
+__printf(4, 5)
+static inline void _batadv_dbg(int type __always_unused,
+ struct batadv_priv *bat_priv __always_unused,
+ int ratelimited __always_unused,
+ const char *fmt __always_unused, ...)
+{
+}
+#endif
+
+#define batadv_dbg(type, bat_priv, arg...) \
+ _batadv_dbg(type, bat_priv, 0, ## arg)
+#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
+ _batadv_dbg(type, bat_priv, 1, ## arg)
+
+#define batadv_info(net_dev, fmt, arg...) \
+ do { \
+ struct net_device *_netdev = (net_dev); \
+ struct batadv_priv *_batpriv = netdev_priv(_netdev); \
+ batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
+ pr_info("%s: " fmt, _netdev->name, ## arg); \
+ } while (0)
+#define batadv_err(net_dev, fmt, arg...) \
+ do { \
+ struct net_device *_netdev = (net_dev); \
+ struct batadv_priv *_batpriv = netdev_priv(_netdev); \
+ batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
+ pr_err("%s: " fmt, _netdev->name, ## arg); \
+ } while (0)
+
+#endif /* _NET_BATMAN_ADV_LOG_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5f2974bd1227..fe4c5e29f96b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -31,16 +31,13 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
-#include <linux/lockdep.h>
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/netdevice.h>
-#include <linux/pkt_sched.h>
+#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
#include <linux/skbuff.h>
-#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/string.h>
@@ -49,6 +46,8 @@
#include <net/rtnetlink.h>
#include "bat_algo.h"
+#include "bat_iv_ogm.h"
+#include "bat_v.h"
#include "bridge_loop_avoidance.h"
#include "debugfs.h"
#include "distributed-arp-table.h"
@@ -56,13 +55,16 @@
#include "gateway_common.h"
#include "hard-interface.h"
#include "icmp_socket.h"
+#include "log.h"
#include "multicast.h"
+#include "netlink.h"
#include "network-coding.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
#include "send.h"
#include "soft-interface.h"
+#include "tp_meter.h"
#include "translation-table.h"
/* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -71,8 +73,6 @@
struct list_head batadv_hardif_list;
static int (*batadv_rx_handler[256])(struct sk_buff *,
struct batadv_hard_iface *);
-char batadv_routing_algo[20] = "BATMAN_IV";
-static struct hlist_head batadv_algo_list;
unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
@@ -83,13 +83,14 @@ static void batadv_recv_handler_init(void);
static int __init batadv_init(void)
{
INIT_LIST_HEAD(&batadv_hardif_list);
- INIT_HLIST_HEAD(&batadv_algo_list);
+ batadv_algo_init();
batadv_recv_handler_init();
batadv_v_init();
batadv_iv_init();
batadv_nc_init();
+ batadv_tp_meter_init();
batadv_event_workqueue = create_singlethread_workqueue("bat_events");
@@ -101,6 +102,7 @@ static int __init batadv_init(void)
register_netdevice_notifier(&batadv_hard_if_notifier);
rtnl_link_register(&batadv_link_ops);
+ batadv_netlink_register();
pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -111,6 +113,7 @@ static int __init batadv_init(void)
static void __exit batadv_exit(void)
{
batadv_debugfs_destroy();
+ batadv_netlink_unregister();
rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
batadv_hardif_remove_interfaces();
@@ -141,6 +144,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
spin_lock_init(&bat_priv->tvlv.container_list_lock);
spin_lock_init(&bat_priv->tvlv.handler_list_lock);
spin_lock_init(&bat_priv->softif_vlan_list_lock);
+ spin_lock_init(&bat_priv->tp_list_lock);
INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
@@ -159,6 +163,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->tvlv.container_list);
INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
+ INIT_HLIST_HEAD(&bat_priv->tp_list);
ret = batadv_v_mesh_init(bat_priv);
if (ret < 0)
@@ -538,78 +543,6 @@ void batadv_recv_handler_unregister(u8 packet_type)
batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet;
}
-static struct batadv_algo_ops *batadv_algo_get(char *name)
-{
- struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
-
- hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
- if (strcmp(bat_algo_ops_tmp->name, name) != 0)
- continue;
-
- bat_algo_ops = bat_algo_ops_tmp;
- break;
- }
-
- return bat_algo_ops;
-}
-
-int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
-{
- struct batadv_algo_ops *bat_algo_ops_tmp;
-
- bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name);
- if (bat_algo_ops_tmp) {
- pr_info("Trying to register already registered routing algorithm: %s\n",
- bat_algo_ops->name);
- return -EEXIST;
- }
-
- /* all algorithms must implement all ops (for now) */
- if (!bat_algo_ops->bat_iface_enable ||
- !bat_algo_ops->bat_iface_disable ||
- !bat_algo_ops->bat_iface_update_mac ||
- !bat_algo_ops->bat_primary_iface_set ||
- !bat_algo_ops->bat_ogm_schedule ||
- !bat_algo_ops->bat_ogm_emit ||
- !bat_algo_ops->bat_neigh_cmp ||
- !bat_algo_ops->bat_neigh_is_similar_or_better) {
- pr_info("Routing algo '%s' does not implement required ops\n",
- bat_algo_ops->name);
- return -EINVAL;
- }
-
- INIT_HLIST_NODE(&bat_algo_ops->list);
- hlist_add_head(&bat_algo_ops->list, &batadv_algo_list);
-
- return 0;
-}
-
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name)
-{
- struct batadv_algo_ops *bat_algo_ops;
-
- bat_algo_ops = batadv_algo_get(name);
- if (!bat_algo_ops)
- return -EINVAL;
-
- bat_priv->bat_algo_ops = bat_algo_ops;
-
- return 0;
-}
-
-int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
-{
- struct batadv_algo_ops *bat_algo_ops;
-
- seq_puts(seq, "Available routing algorithms:\n");
-
- hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
- seq_printf(seq, " * %s\n", bat_algo_ops->name);
- }
-
- return 0;
-}
-
/**
* batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in
* the header
@@ -644,594 +577,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
}
/**
- * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
- * free after rcu grace period
- * @ref: kref pointer of the tvlv
- */
-static void batadv_tvlv_handler_release(struct kref *ref)
-{
- struct batadv_tvlv_handler *tvlv_handler;
-
- tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
- kfree_rcu(tvlv_handler, rcu);
-}
-
-/**
- * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
- * possibly release it
- * @tvlv_handler: the tvlv handler to free
- */
-static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
-{
- kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
-}
-
-/**
- * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
- * based on the provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv handler type to look for
- * @version: tvlv handler version to look for
- *
- * Return: tvlv handler if found or NULL otherwise.
- */
-static struct batadv_tvlv_handler *
-batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
-{
- struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tvlv_handler_tmp,
- &bat_priv->tvlv.handler_list, list) {
- if (tvlv_handler_tmp->type != type)
- continue;
-
- if (tvlv_handler_tmp->version != version)
- continue;
-
- if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
- continue;
-
- tvlv_handler = tvlv_handler_tmp;
- break;
- }
- rcu_read_unlock();
-
- return tvlv_handler;
-}
-
-/**
- * batadv_tvlv_container_release - release tvlv from lists and free
- * @ref: kref pointer of the tvlv
- */
-static void batadv_tvlv_container_release(struct kref *ref)
-{
- struct batadv_tvlv_container *tvlv;
-
- tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
- kfree(tvlv);
-}
-
-/**
- * batadv_tvlv_container_put - decrement the tvlv container refcounter and
- * possibly release it
- * @tvlv: the tvlv container to free
- */
-static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
-{
- kref_put(&tvlv->refcount, batadv_tvlv_container_release);
-}
-
-/**
- * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
- * list based on the provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type to look for
- * @version: tvlv container version to look for
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- *
- * Return: tvlv container if found or NULL otherwise.
- */
-static struct batadv_tvlv_container *
-batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
-{
- struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
-
- lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
- hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
- if (tvlv_tmp->tvlv_hdr.type != type)
- continue;
-
- if (tvlv_tmp->tvlv_hdr.version != version)
- continue;
-
- kref_get(&tvlv_tmp->refcount);
- tvlv = tvlv_tmp;
- break;
- }
-
- return tvlv;
-}
-
-/**
- * batadv_tvlv_container_list_size - calculate the size of the tvlv container
- * list entries
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- *
- * Return: size of all currently registered tvlv containers in bytes.
- */
-static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
-{
- struct batadv_tvlv_container *tvlv;
- u16 tvlv_len = 0;
-
- lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
- hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
- tvlv_len += sizeof(struct batadv_tvlv_hdr);
- tvlv_len += ntohs(tvlv->tvlv_hdr.len);
- }
-
- return tvlv_len;
-}
-
-/**
- * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
- * list
- * @bat_priv: the bat priv with all the soft interface information
- * @tvlv: the to be removed tvlv container
- *
- * Has to be called with the appropriate locks being acquired
- * (tvlv.container_list_lock).
- */
-static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
- struct batadv_tvlv_container *tvlv)
-{
- lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
-
- if (!tvlv)
- return;
-
- hlist_del(&tvlv->list);
-
- /* first call to decrement the counter, second call to free */
- batadv_tvlv_container_put(tvlv);
- batadv_tvlv_container_put(tvlv);
-}
-
-/**
- * batadv_tvlv_container_unregister - unregister tvlv container based on the
- * provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type to unregister
- * @version: tvlv container type to unregister
- */
-void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
- u8 type, u8 version)
-{
- struct batadv_tvlv_container *tvlv;
-
- spin_lock_bh(&bat_priv->tvlv.container_list_lock);
- tvlv = batadv_tvlv_container_get(bat_priv, type, version);
- batadv_tvlv_container_remove(bat_priv, tvlv);
- spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-}
-
-/**
- * batadv_tvlv_container_register - register tvlv type, version and content
- * to be propagated with each (primary interface) OGM
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv container type
- * @version: tvlv container version
- * @tvlv_value: tvlv container content
- * @tvlv_value_len: tvlv container content length
- *
- * If a container of the same type and version was already registered the new
- * content is going to replace the old one.
- */
-void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
- u8 type, u8 version,
- void *tvlv_value, u16 tvlv_value_len)
-{
- struct batadv_tvlv_container *tvlv_old, *tvlv_new;
-
- if (!tvlv_value)
- tvlv_value_len = 0;
-
- tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
- if (!tvlv_new)
- return;
-
- tvlv_new->tvlv_hdr.version = version;
- tvlv_new->tvlv_hdr.type = type;
- tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
-
- memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
- INIT_HLIST_NODE(&tvlv_new->list);
- kref_init(&tvlv_new->refcount);
-
- spin_lock_bh(&bat_priv->tvlv.container_list_lock);
- tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
- batadv_tvlv_container_remove(bat_priv, tvlv_old);
- hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
- spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
-}
-
-/**
- * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
- * requested packet size
- * @packet_buff: packet buffer
- * @packet_buff_len: packet buffer size
- * @min_packet_len: requested packet minimum size
- * @additional_packet_len: requested additional packet size on top of minimum
- * size
- *
- * Return: true of the packet buffer could be changed to the requested size,
- * false otherwise.
- */
-static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
- int *packet_buff_len,
- int min_packet_len,
- int additional_packet_len)
-{
- unsigned char *new_buff;
-
- new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
-
- /* keep old buffer if kmalloc should fail */
- if (!new_buff)
- return false;
-
- memcpy(new_buff, *packet_buff, min_packet_len);
- kfree(*packet_buff);
- *packet_buff = new_buff;
- *packet_buff_len = min_packet_len + additional_packet_len;
-
- return true;
-}
-
-/**
- * batadv_tvlv_container_ogm_append - append tvlv container content to given
- * OGM packet buffer
- * @bat_priv: the bat priv with all the soft interface information
- * @packet_buff: ogm packet buffer
- * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
- * content
- * @packet_min_len: ogm header size to be preserved for the OGM itself
- *
- * The ogm packet might be enlarged or shrunk depending on the current size
- * and the size of the to-be-appended tvlv containers.
- *
- * Return: size of all appended tvlv containers in bytes.
- */
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len, int packet_min_len)
-{
- struct batadv_tvlv_container *tvlv;
- struct batadv_tvlv_hdr *tvlv_hdr;
- u16 tvlv_value_len;
- void *tvlv_value;
- bool ret;
-
- spin_lock_bh(&bat_priv->tvlv.container_list_lock);
- tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
-
- ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
- packet_min_len, tvlv_value_len);
-
- if (!ret)
- goto end;
-
- if (!tvlv_value_len)
- goto end;
-
- tvlv_value = (*packet_buff) + packet_min_len;
-
- hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
- tvlv_hdr = tvlv_value;
- tvlv_hdr->type = tvlv->tvlv_hdr.type;
- tvlv_hdr->version = tvlv->tvlv_hdr.version;
- tvlv_hdr->len = tvlv->tvlv_hdr.len;
- tvlv_value = tvlv_hdr + 1;
- memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
- tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
- }
-
-end:
- spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
- return tvlv_value_len;
-}
-
-/**
- * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
- * appropriate handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @tvlv_handler: tvlv callback function handling the tvlv content
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
- * @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- *
- * Return: success if handler was not found or the return value of the handler
- * callback.
- */
-static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
- struct batadv_tvlv_handler *tvlv_handler,
- bool ogm_source,
- struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_value, u16 tvlv_value_len)
-{
- if (!tvlv_handler)
- return NET_RX_SUCCESS;
-
- if (ogm_source) {
- if (!tvlv_handler->ogm_handler)
- return NET_RX_SUCCESS;
-
- if (!orig_node)
- return NET_RX_SUCCESS;
-
- tvlv_handler->ogm_handler(bat_priv, orig_node,
- BATADV_NO_FLAGS,
- tvlv_value, tvlv_value_len);
- tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
- } else {
- if (!src)
- return NET_RX_SUCCESS;
-
- if (!dst)
- return NET_RX_SUCCESS;
-
- if (!tvlv_handler->unicast_handler)
- return NET_RX_SUCCESS;
-
- return tvlv_handler->unicast_handler(bat_priv, src,
- dst, tvlv_value,
- tvlv_value_len);
- }
-
- return NET_RX_SUCCESS;
-}
-
-/**
- * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
- * appropriate handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
- * @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- *
- * Return: success when processing an OGM or the return value of all called
- * handler callbacks.
- */
-int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
- bool ogm_source,
- struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_value, u16 tvlv_value_len)
-{
- struct batadv_tvlv_handler *tvlv_handler;
- struct batadv_tvlv_hdr *tvlv_hdr;
- u16 tvlv_value_cont_len;
- u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
- int ret = NET_RX_SUCCESS;
-
- while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
- tvlv_hdr = tvlv_value;
- tvlv_value_cont_len = ntohs(tvlv_hdr->len);
- tvlv_value = tvlv_hdr + 1;
- tvlv_value_len -= sizeof(*tvlv_hdr);
-
- if (tvlv_value_cont_len > tvlv_value_len)
- break;
-
- tvlv_handler = batadv_tvlv_handler_get(bat_priv,
- tvlv_hdr->type,
- tvlv_hdr->version);
-
- ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
- ogm_source, orig_node,
- src, dst, tvlv_value,
- tvlv_value_cont_len);
- if (tvlv_handler)
- batadv_tvlv_handler_put(tvlv_handler);
- tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
- tvlv_value_len -= tvlv_value_cont_len;
- }
-
- if (!ogm_source)
- return ret;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tvlv_handler,
- &bat_priv->tvlv.handler_list, list) {
- if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
- !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
- tvlv_handler->ogm_handler(bat_priv, orig_node,
- cifnotfound, NULL, 0);
-
- tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
- }
- rcu_read_unlock();
-
- return NET_RX_SUCCESS;
-}
-
-/**
- * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
- * handlers
- * @bat_priv: the bat priv with all the soft interface information
- * @batadv_ogm_packet: ogm packet containing the tvlv containers
- * @orig_node: orig node emitting the ogm packet
- */
-void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
- struct batadv_ogm_packet *batadv_ogm_packet,
- struct batadv_orig_node *orig_node)
-{
- void *tvlv_value;
- u16 tvlv_value_len;
-
- if (!batadv_ogm_packet)
- return;
-
- tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
- if (!tvlv_value_len)
- return;
-
- tvlv_value = batadv_ogm_packet + 1;
-
- batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
- tvlv_value, tvlv_value_len);
-}
-
-/**
- * batadv_tvlv_handler_register - register tvlv handler based on the provided
- * type and version (both need to match) for ogm tvlv payload and/or unicast
- * payload
- * @bat_priv: the bat priv with all the soft interface information
- * @optr: ogm tvlv handler callback function. This function receives the orig
- * node, flags and the tvlv content as argument to process.
- * @uptr: unicast tvlv handler callback function. This function receives the
- * source & destination of the unicast packet as well as the tvlv content
- * to process.
- * @type: tvlv handler type to be registered
- * @version: tvlv handler version to be registered
- * @flags: flags to enable or disable TVLV API behavior
- */
-void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
- void (*optr)(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig,
- u8 flags,
- void *tvlv_value,
- u16 tvlv_value_len),
- int (*uptr)(struct batadv_priv *bat_priv,
- u8 *src, u8 *dst,
- void *tvlv_value,
- u16 tvlv_value_len),
- u8 type, u8 version, u8 flags)
-{
- struct batadv_tvlv_handler *tvlv_handler;
-
- tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
- if (tvlv_handler) {
- batadv_tvlv_handler_put(tvlv_handler);
- return;
- }
-
- tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
- if (!tvlv_handler)
- return;
-
- tvlv_handler->ogm_handler = optr;
- tvlv_handler->unicast_handler = uptr;
- tvlv_handler->type = type;
- tvlv_handler->version = version;
- tvlv_handler->flags = flags;
- kref_init(&tvlv_handler->refcount);
- INIT_HLIST_NODE(&tvlv_handler->list);
-
- spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
- hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
- spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
-}
-
-/**
- * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
- * provided type and version (both need to match)
- * @bat_priv: the bat priv with all the soft interface information
- * @type: tvlv handler type to be unregistered
- * @version: tvlv handler version to be unregistered
- */
-void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
- u8 type, u8 version)
-{
- struct batadv_tvlv_handler *tvlv_handler;
-
- tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
- if (!tvlv_handler)
- return;
-
- batadv_tvlv_handler_put(tvlv_handler);
- spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
- hlist_del_rcu(&tvlv_handler->list);
- spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
- batadv_tvlv_handler_put(tvlv_handler);
-}
-
-/**
- * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
- * specified host
- * @bat_priv: the bat priv with all the soft interface information
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
- * @type: tvlv type
- * @version: tvlv version
- * @tvlv_value: tvlv content
- * @tvlv_value_len: tvlv content length
- */
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
- u8 *dst, u8 type, u8 version,
- void *tvlv_value, u16 tvlv_value_len)
-{
- struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
- struct batadv_tvlv_hdr *tvlv_hdr;
- struct batadv_orig_node *orig_node;
- struct sk_buff *skb;
- unsigned char *tvlv_buff;
- unsigned int tvlv_len;
- ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
-
- orig_node = batadv_orig_hash_find(bat_priv, dst);
- if (!orig_node)
- return;
-
- tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
-
- skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
- if (!skb)
- goto out;
-
- skb->priority = TC_PRIO_CONTROL;
- skb_reserve(skb, ETH_HLEN);
- tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
- unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
- unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV;
- unicast_tvlv_packet->version = BATADV_COMPAT_VERSION;
- unicast_tvlv_packet->ttl = BATADV_TTL;
- unicast_tvlv_packet->reserved = 0;
- unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
- unicast_tvlv_packet->align = 0;
- ether_addr_copy(unicast_tvlv_packet->src, src);
- ether_addr_copy(unicast_tvlv_packet->dst, dst);
-
- tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
- tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
- tvlv_hdr->version = version;
- tvlv_hdr->type = type;
- tvlv_hdr->len = htons(tvlv_value_len);
- tvlv_buff += sizeof(*tvlv_hdr);
- memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
-
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
- kfree_skb(skb);
-out:
- batadv_orig_node_put(orig_node);
-}
-
-/**
* batadv_get_vid - extract the VLAN identifier from skb if any
* @skb: the buffer containing the packet
* @header_len: length of the batman header preceding the ethernet header
@@ -1284,36 +629,6 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
return ap_isolation_enabled;
}
-static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
-{
- struct batadv_algo_ops *bat_algo_ops;
- char *algo_name = (char *)val;
- size_t name_len = strlen(algo_name);
-
- if (name_len > 0 && algo_name[name_len - 1] == '\n')
- algo_name[name_len - 1] = '\0';
-
- bat_algo_ops = batadv_algo_get(algo_name);
- if (!bat_algo_ops) {
- pr_err("Routing algorithm '%s' is not supported\n", algo_name);
- return -EINVAL;
- }
-
- return param_set_copystring(algo_name, kp);
-}
-
-static const struct kernel_param_ops batadv_param_ops_ra = {
- .set = batadv_param_set_ra,
- .get = param_get_string,
-};
-
-static struct kparam_string batadv_param_string_ra = {
- .maxlen = sizeof(batadv_routing_algo),
- .string = batadv_routing_algo,
-};
-
-module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
- 0644);
module_init(batadv_init);
module_exit(batadv_exit);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 76925266deed..06a860845434 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.2"
+#define BATADV_SOURCE_VERSION "2016.3"
#endif
/* B.A.T.M.A.N. parameters */
@@ -100,6 +100,9 @@
#define BATADV_NUM_BCASTS_WIRELESS 3
#define BATADV_NUM_BCASTS_MAX 3
+/* length of the single packet used by the TP meter */
+#define BATADV_TP_PACKET_LEN ETH_DATA_LEN
+
/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */
#define ARP_REQ_DELAY 250
/* numbers of originator to contact for any PUT/GET DHT operation */
@@ -131,6 +134,11 @@
#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */
+/**
+ * BATADV_TP_MAX_NUM - maximum number of simultaneously active tp sessions
+ */
+#define BATADV_TP_MAX_NUM 5
+
enum batadv_mesh_state {
BATADV_MESH_INACTIVE,
BATADV_MESH_ACTIVE,
@@ -175,29 +183,26 @@ enum batadv_uev_type {
/* Kernel headers */
-#include <linux/atomic.h>
#include <linux/bitops.h> /* for packet.h */
#include <linux/compiler.h>
#include <linux/cpumask.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h> /* for packet.h */
-#include <linux/netdevice.h>
-#include <linux/printk.h>
-#include <linux/types.h>
-#include <linux/percpu.h>
-#include <linux/jiffies.h>
#include <linux/if_vlan.h>
+#include <linux/jiffies.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
#include "types.h"
-struct batadv_ogm_packet;
+struct net_device;
+struct packet_type;
struct seq_file;
struct sk_buff;
#define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \
(int)(vid & VLAN_VID_MASK) : -1)
-extern char batadv_routing_algo[];
extern struct list_head batadv_hardif_list;
extern unsigned char batadv_broadcast_addr[];
@@ -218,74 +223,9 @@ batadv_recv_handler_register(u8 packet_type,
int (*recv_handler)(struct sk_buff *,
struct batadv_hard_iface *));
void batadv_recv_handler_unregister(u8 packet_type);
-int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
-int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
-int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
/**
- * enum batadv_dbg_level - available log levels
- * @BATADV_DBG_BATMAN: OGM and TQ computations related messages
- * @BATADV_DBG_ROUTES: route added / changed / deleted
- * @BATADV_DBG_TT: translation table messages
- * @BATADV_DBG_BLA: bridge loop avoidance messages
- * @BATADV_DBG_DAT: ARP snooping and DAT related messages
- * @BATADV_DBG_NC: network coding related messages
- * @BATADV_DBG_ALL: the union of all the above log levels
- */
-enum batadv_dbg_level {
- BATADV_DBG_BATMAN = BIT(0),
- BATADV_DBG_ROUTES = BIT(1),
- BATADV_DBG_TT = BIT(2),
- BATADV_DBG_BLA = BIT(3),
- BATADV_DBG_DAT = BIT(4),
- BATADV_DBG_NC = BIT(5),
- BATADV_DBG_ALL = 63,
-};
-
-#ifdef CONFIG_BATMAN_ADV_DEBUG
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-__printf(2, 3);
-
-/* possibly ratelimited debug output */
-#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
- do { \
- if (atomic_read(&bat_priv->log_level) & type && \
- (!ratelimited || net_ratelimit())) \
- batadv_debug_log(bat_priv, fmt, ## arg);\
- } \
- while (0)
-#else /* !CONFIG_BATMAN_ADV_DEBUG */
-__printf(4, 5)
-static inline void _batadv_dbg(int type __always_unused,
- struct batadv_priv *bat_priv __always_unused,
- int ratelimited __always_unused,
- const char *fmt __always_unused, ...)
-{
-}
-#endif
-
-#define batadv_dbg(type, bat_priv, arg...) \
- _batadv_dbg(type, bat_priv, 0, ## arg)
-#define batadv_dbg_ratelimited(type, bat_priv, arg...) \
- _batadv_dbg(type, bat_priv, 1, ## arg)
-
-#define batadv_info(net_dev, fmt, arg...) \
- do { \
- struct net_device *_netdev = (net_dev); \
- struct batadv_priv *_batpriv = netdev_priv(_netdev); \
- batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
- pr_info("%s: " fmt, _netdev->name, ## arg); \
- } while (0)
-#define batadv_err(net_dev, fmt, arg...) \
- do { \
- struct net_device *_netdev = (net_dev); \
- struct batadv_priv *_batpriv = netdev_priv(_netdev); \
- batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \
- pr_err("%s: " fmt, _netdev->name, ## arg); \
- } while (0)
-
-/**
* batadv_compare_eth - Compare two not u16 aligned Ethernet addresses
* @data1: Pointer to a six-byte array containing the Ethernet address
* @data2: Pointer other six-byte array containing the Ethernet address
@@ -370,39 +310,6 @@ static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
*/
#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
-void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
- u8 type, u8 version,
- void *tvlv_value, u16 tvlv_value_len);
-u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
- unsigned char **packet_buff,
- int *packet_buff_len, int packet_min_len);
-void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
- struct batadv_ogm_packet *batadv_ogm_packet,
- struct batadv_orig_node *orig_node);
-void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
- u8 type, u8 version);
-
-void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
- void (*optr)(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig,
- u8 flags,
- void *tvlv_value,
- u16 tvlv_value_len),
- int (*uptr)(struct batadv_priv *bat_priv,
- u8 *src, u8 *dst,
- void *tvlv_value,
- u16 tvlv_value_len),
- u8 type, u8 version, u8 flags);
-void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
- u8 type, u8 version);
-int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
- bool ogm_source,
- struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_buff, u16 tvlv_buff_len);
-void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
- u8 *dst, u8 type, u8 version,
- void *tvlv_value, u16 tvlv_value_len);
unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len);
bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid);
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index c32f24fafe67..cc915073a753 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -25,17 +25,23 @@
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
+#include <linux/icmpv6.h>
+#include <linux/if_bridge.h>
#include <linux/if_ether.h>
-#include <linux/in6.h>
+#include <linux/igmp.h>
#include <linux/in.h>
+#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
+#include <linux/printk.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -43,18 +49,57 @@
#include <linux/string.h>
#include <linux/types.h>
#include <net/addrconf.h>
+#include <net/if_inet6.h>
+#include <net/ip.h>
#include <net/ipv6.h>
+#include "hard-interface.h"
+#include "hash.h"
+#include "log.h"
#include "packet.h"
#include "translation-table.h"
+#include "tvlv.h"
+
+/**
+ * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists
+ * @soft_iface: netdev struct of the mesh interface
+ *
+ * If the given soft interface has a bridge on top then the refcount
+ * of the according net device is increased.
+ *
+ * Return: NULL if no such bridge exists. Otherwise the net device of the
+ * bridge.
+ */
+static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
+{
+ struct net_device *upper = soft_iface;
+
+ rcu_read_lock();
+ do {
+ upper = netdev_master_upper_dev_get_rcu(upper);
+ } while (upper && !(upper->priv_flags & IFF_EBRIDGE));
+
+ if (upper)
+ dev_hold(upper);
+ rcu_read_unlock();
+
+ return upper;
+}
/**
* batadv_mcast_mla_softif_get - get softif multicast listeners
* @dev: the device to collect multicast addresses from
* @mcast_list: a list to put found addresses into
*
- * Collect multicast addresses of the local multicast listeners
- * on the given soft interface, dev, in the given mcast_list.
+ * Collects multicast addresses of multicast listeners residing
+ * on this kernel on the given soft interface, dev, in
+ * the given mcast_list. In general, multicast listeners provided by
+ * your multicast receiving applications run directly on this node.
+ *
+ * If there is a bridge interface on top of dev, collects from that one
+ * instead. Just like with IP addresses and routes, multicast listeners
+ * will(/should) register to the bridge interface instead of an
+ * enslaved bat0.
*
* Return: -ENOMEM on memory allocation error or the number of
* items added to the mcast_list otherwise.
@@ -62,12 +107,13 @@
static int batadv_mcast_mla_softif_get(struct net_device *dev,
struct hlist_head *mcast_list)
{
+ struct net_device *bridge = batadv_mcast_get_bridge(dev);
struct netdev_hw_addr *mc_list_entry;
struct batadv_hw_addr *new;
int ret = 0;
- netif_addr_lock_bh(dev);
- netdev_for_each_mc_addr(mc_list_entry, dev) {
+ netif_addr_lock_bh(bridge ? bridge : dev);
+ netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) {
new = kmalloc(sizeof(*new), GFP_ATOMIC);
if (!new) {
ret = -ENOMEM;
@@ -78,7 +124,10 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
hlist_add_head(&new->list, mcast_list);
ret++;
}
- netif_addr_unlock_bh(dev);
+ netif_addr_unlock_bh(bridge ? bridge : dev);
+
+ if (bridge)
+ dev_put(bridge);
return ret;
}
@@ -104,6 +153,83 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
}
/**
+ * batadv_mcast_mla_br_addr_cpy - copy a bridge multicast address
+ * @dst: destination to write to - a multicast MAC address
+ * @src: source to read from - a multicast IP address
+ *
+ * Converts a given multicast IPv4/IPv6 address from a bridge
+ * to its matching multicast MAC address and copies it into the given
+ * destination buffer.
+ *
+ * Caller needs to make sure the destination buffer can hold
+ * at least ETH_ALEN bytes.
+ */
+static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
+{
+ if (src->proto == htons(ETH_P_IP))
+ ip_eth_mc_map(src->u.ip4, dst);
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (src->proto == htons(ETH_P_IPV6))
+ ipv6_eth_mc_map(&src->u.ip6, dst);
+#endif
+ else
+ eth_zero_addr(dst);
+}
+
+/**
+ * batadv_mcast_mla_bridge_get - get bridged-in multicast listeners
+ * @dev: a bridge slave whose bridge to collect multicast addresses from
+ * @mcast_list: a list to put found addresses into
+ *
+ * Collects multicast addresses of multicast listeners residing
+ * on foreign, non-mesh devices which we gave access to our mesh via
+ * a bridge on top of the given soft interface, dev, in the given
+ * mcast_list.
+ *
+ * Return: -ENOMEM on memory allocation error or the number of
+ * items added to the mcast_list otherwise.
+ */
+static int batadv_mcast_mla_bridge_get(struct net_device *dev,
+ struct hlist_head *mcast_list)
+{
+ struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
+ struct br_ip_list *br_ip_entry, *tmp;
+ struct batadv_hw_addr *new;
+ u8 mcast_addr[ETH_ALEN];
+ int ret;
+
+ /* we don't need to detect these devices/listeners, the IGMP/MLD
+ * snooping code of the Linux bridge already does that for us
+ */
+ ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
+ if (ret < 0)
+ goto out;
+
+ list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) {
+ batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr);
+ if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list))
+ continue;
+
+ new = kmalloc(sizeof(*new), GFP_ATOMIC);
+ if (!new) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ether_addr_copy(new->addr, mcast_addr);
+ hlist_add_head(&new->list, mcast_list);
+ }
+
+out:
+ list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) {
+ list_del(&br_ip_entry->list);
+ kfree(br_ip_entry);
+ }
+
+ return ret;
+}
+
+/**
* batadv_mcast_mla_list_free - free a list of multicast addresses
* @bat_priv: the bat priv with all the soft interface information
* @mcast_list: the list to free
@@ -214,44 +340,195 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
}
/**
+ * batadv_mcast_querier_log - debug output regarding the querier status on link
+ * @bat_priv: the bat priv with all the soft interface information
+ * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD")
+ * @old_state: the previous querier state on our link
+ * @new_state: the new querier state on our link
+ *
+ * Outputs debug messages to the logging facility with log level 'mcast'
+ * regarding changes to the querier status on the link which are relevant
+ * to our multicast optimizations.
+ *
+ * Usually this is about whether a querier appeared or vanished in
+ * our mesh or whether the querier is in the suboptimal position of being
+ * behind our local bridge segment: Snooping switches will directly
+ * forward listener reports to the querier, therefore batman-adv and
+ * the bridge will potentially not see these listeners - the querier is
+ * potentially shadowing listeners from us then.
+ *
+ * This is only interesting for nodes with a bridge on top of their
+ * soft interface.
+ */
+static void
+batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto,
+ struct batadv_mcast_querier_state *old_state,
+ struct batadv_mcast_querier_state *new_state)
+{
+ if (!old_state->exists && new_state->exists)
+ batadv_info(bat_priv->soft_iface, "%s Querier appeared\n",
+ str_proto);
+ else if (old_state->exists && !new_state->exists)
+ batadv_info(bat_priv->soft_iface,
+ "%s Querier disappeared - multicast optimizations disabled\n",
+ str_proto);
+ else if (!bat_priv->mcast.bridged && !new_state->exists)
+ batadv_info(bat_priv->soft_iface,
+ "No %s Querier present - multicast optimizations disabled\n",
+ str_proto);
+
+ if (new_state->exists) {
+ if ((!old_state->shadowing && new_state->shadowing) ||
+ (!old_state->exists && new_state->shadowing))
+ batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+ "%s Querier is behind our bridged segment: Might shadow listeners\n",
+ str_proto);
+ else if (old_state->shadowing && !new_state->shadowing)
+ batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+ "%s Querier is not behind our bridged segment\n",
+ str_proto);
+ }
+}
+
+/**
+ * batadv_mcast_bridge_log - debug output for topology changes in bridged setups
+ * @bat_priv: the bat priv with all the soft interface information
+ * @bridged: a flag about whether the soft interface is currently bridged or not
+ * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier
+ * @querier_ipv6: (maybe) new status of a potential, selected MLD querier
+ *
+ * If no bridges are ever used on this node, then this function does nothing.
+ *
+ * Otherwise this function outputs debug information to the 'mcast' log level
+ * which might be relevant to our multicast optimizations.
+ *
+ * More precisely, it outputs information when a bridge interface is added or
+ * removed from a soft interface. And when a bridge is present, it further
+ * outputs information about the querier state which is relevant for the
+ * multicast flags this node is going to set.
+ */
+static void
+batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged,
+ struct batadv_mcast_querier_state *querier_ipv4,
+ struct batadv_mcast_querier_state *querier_ipv6)
+{
+ if (!bat_priv->mcast.bridged && bridged)
+ batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+ "Bridge added: Setting Unsnoopables(U)-flag\n");
+ else if (bat_priv->mcast.bridged && !bridged)
+ batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+ "Bridge removed: Unsetting Unsnoopables(U)-flag\n");
+
+ if (bridged) {
+ batadv_mcast_querier_log(bat_priv, "IGMP",
+ &bat_priv->mcast.querier_ipv4,
+ querier_ipv4);
+ batadv_mcast_querier_log(bat_priv, "MLD",
+ &bat_priv->mcast.querier_ipv6,
+ querier_ipv6);
+ }
+}
+
+/**
+ * batadv_mcast_flags_logs - output debug information about mcast flag changes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @flags: flags indicating the new multicast state
+ *
+ * Whenever the multicast flags this nodes announces changes (@mcast_flags vs.
+ * bat_priv->mcast.flags), this notifies userspace via the 'mcast' log level.
+ */
+static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
+{
+ u8 old_flags = bat_priv->mcast.flags;
+ char str_old_flags[] = "[...]";
+
+ sprintf(str_old_flags, "[%c%c%c]",
+ (old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+ (old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+ (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+
+ batadv_dbg(BATADV_DBG_MCAST, bat_priv,
+ "Changing multicast flags from '%s' to '[%c%c%c]'\n",
+ bat_priv->mcast.enabled ? str_old_flags : "<undefined>",
+ (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+ (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+ (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+}
+
+/**
* batadv_mcast_mla_tvlv_update - update multicast tvlv
* @bat_priv: the bat priv with all the soft interface information
*
* Updates the own multicast tvlv with our current multicast related settings,
* capabilities and inabilities.
*
- * Return: true if the tvlv container is registered afterwards. Otherwise
- * returns false.
+ * Return: false if we want all IPv4 && IPv6 multicast traffic and true
+ * otherwise.
*/
static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
{
struct batadv_tvlv_mcast_data mcast_data;
+ struct batadv_mcast_querier_state querier4 = {false, false};
+ struct batadv_mcast_querier_state querier6 = {false, false};
+ struct net_device *dev = bat_priv->soft_iface;
+ bool bridged;
mcast_data.flags = BATADV_NO_FLAGS;
memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved));
- /* Avoid attaching MLAs, if there is a bridge on top of our soft
- * interface, we don't support that yet (TODO)
+ bridged = batadv_mcast_has_bridge(bat_priv);
+ if (!bridged)
+ goto update;
+
+#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
+ pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
+#endif
+
+ querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
+ querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
+
+ querier6.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6);
+ querier6.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6);
+
+ mcast_data.flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES;
+
+ /* 1) If no querier exists at all, then multicast listeners on
+ * our local TT clients behind the bridge will keep silent.
+ * 2) If the selected querier is on one of our local TT clients,
+ * behind the bridge, then this querier might shadow multicast
+ * listeners on our local TT clients, behind this bridge.
+ *
+ * In both cases, we will signalize other batman nodes that
+ * we need all multicast traffic of the according protocol.
*/
- if (batadv_mcast_has_bridge(bat_priv)) {
- if (bat_priv->mcast.enabled) {
- batadv_tvlv_container_unregister(bat_priv,
- BATADV_TVLV_MCAST, 1);
- bat_priv->mcast.enabled = false;
- }
+ if (!querier4.exists || querier4.shadowing)
+ mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV4;
- return false;
- }
+ if (!querier6.exists || querier6.shadowing)
+ mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV6;
+
+update:
+ batadv_mcast_bridge_log(bat_priv, bridged, &querier4, &querier6);
+
+ bat_priv->mcast.querier_ipv4.exists = querier4.exists;
+ bat_priv->mcast.querier_ipv4.shadowing = querier4.shadowing;
+
+ bat_priv->mcast.querier_ipv6.exists = querier6.exists;
+ bat_priv->mcast.querier_ipv6.shadowing = querier6.shadowing;
+
+ bat_priv->mcast.bridged = bridged;
if (!bat_priv->mcast.enabled ||
mcast_data.flags != bat_priv->mcast.flags) {
- batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 1,
+ batadv_mcast_flags_log(bat_priv, mcast_data.flags);
+ batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2,
&mcast_data, sizeof(mcast_data));
bat_priv->mcast.flags = mcast_data.flags;
bat_priv->mcast.enabled = true;
}
- return true;
+ return !(mcast_data.flags &
+ (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6));
}
/**
@@ -274,6 +551,10 @@ void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
if (ret < 0)
goto out;
+ ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list);
+ if (ret < 0)
+ goto out;
+
update:
batadv_mcast_mla_tt_retract(bat_priv, &mcast_list);
batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
@@ -283,6 +564,31 @@ out:
}
/**
+ * batadv_mcast_is_report_ipv4 - check for IGMP reports
+ * @skb: the ethernet frame destined for the mesh
+ *
+ * This call might reallocate skb data.
+ *
+ * Checks whether the given frame is a valid IGMP report.
+ *
+ * Return: If so then true, otherwise false.
+ */
+static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
+{
+ if (ip_mc_check_igmp(skb, NULL) < 0)
+ return false;
+
+ switch (igmp_hdr(skb)->type) {
+ case IGMP_HOST_MEMBERSHIP_REPORT:
+ case IGMPV2_HOST_MEMBERSHIP_REPORT:
+ case IGMPV3_HOST_MEMBERSHIP_REPORT:
+ return true;
+ }
+
+ return false;
+}
+
+/**
* batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential
* @bat_priv: the bat priv with all the soft interface information
* @skb: the IPv4 packet to check
@@ -304,6 +610,9 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr)))
return -ENOMEM;
+ if (batadv_mcast_is_report_ipv4(skb))
+ return -EINVAL;
+
iphdr = ip_hdr(skb);
/* TODO: Implement Multicast Router Discovery (RFC4286),
@@ -320,6 +629,31 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
return 0;
}
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * batadv_mcast_is_report_ipv6 - check for MLD reports
+ * @skb: the ethernet frame destined for the mesh
+ *
+ * This call might reallocate skb data.
+ *
+ * Checks whether the given frame is a valid MLD report.
+ *
+ * Return: If so then true, otherwise false.
+ */
+static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
+{
+ if (ipv6_mc_check_mld(skb, NULL) < 0)
+ return false;
+
+ switch (icmp6_hdr(skb)->icmp6_type) {
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MLD2_REPORT:
+ return true;
+ }
+
+ return false;
+}
+
/**
* batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential
* @bat_priv: the bat priv with all the soft interface information
@@ -341,6 +675,9 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr)))
return -ENOMEM;
+ if (batadv_mcast_is_report_ipv6(skb))
+ return -EINVAL;
+
ip6hdr = ipv6_hdr(skb);
/* TODO: Implement Multicast Router Discovery (RFC4286),
@@ -357,6 +694,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
return 0;
}
+#endif
/**
* batadv_mcast_forw_mode_check - check for optimized forwarding potential
@@ -385,9 +723,11 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
case ETH_P_IP:
return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
is_unsnoopable);
+#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
is_unsnoopable);
+#endif
default:
return -EINVAL;
}
@@ -728,18 +1068,18 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_tvlv_ogm_handler_v1 - process incoming multicast tvlv container
+ * batadv_mcast_tvlv_ogm_handler - process incoming multicast tvlv container
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node of the ogm
* @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags)
* @tvlv_value: tvlv buffer containing the multicast data
* @tvlv_value_len: tvlv buffer length
*/
-static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
- struct batadv_orig_node *orig,
- u8 flags,
- void *tvlv_value,
- u16 tvlv_value_len)
+static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 flags,
+ void *tvlv_value,
+ u16 tvlv_value_len)
{
bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
u8 mcast_flags = BATADV_NO_FLAGS;
@@ -789,19 +1129,120 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
*/
void batadv_mcast_init(struct batadv_priv *bat_priv)
{
- batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler_v1,
- NULL, BATADV_TVLV_MCAST, 1,
+ batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
+ NULL, BATADV_TVLV_MCAST, 2,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
}
/**
+ * batadv_mcast_flags_print_header - print own mcast flags to debugfs table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: debugfs table seq_file struct
+ *
+ * Prints our own multicast flags including a more specific reason why
+ * they are set, that is prints the bridge and querier state too, to
+ * the debugfs table specified via @seq.
+ */
+static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv,
+ struct seq_file *seq)
+{
+ u8 flags = bat_priv->mcast.flags;
+ char querier4, querier6, shadowing4, shadowing6;
+ bool bridged = bat_priv->mcast.bridged;
+
+ if (bridged) {
+ querier4 = bat_priv->mcast.querier_ipv4.exists ? '.' : '4';
+ querier6 = bat_priv->mcast.querier_ipv6.exists ? '.' : '6';
+ shadowing4 = bat_priv->mcast.querier_ipv4.shadowing ? '4' : '.';
+ shadowing6 = bat_priv->mcast.querier_ipv6.shadowing ? '6' : '.';
+ } else {
+ querier4 = '?';
+ querier6 = '?';
+ shadowing4 = '?';
+ shadowing6 = '?';
+ }
+
+ seq_printf(seq, "Multicast flags (own flags: [%c%c%c])\n",
+ (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
+ (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
+ (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.');
+ seq_printf(seq, "* Bridged [U]\t\t\t\t%c\n", bridged ? 'U' : '.');
+ seq_printf(seq, "* No IGMP/MLD Querier [4/6]:\t\t%c/%c\n",
+ querier4, querier6);
+ seq_printf(seq, "* Shadowing IGMP/MLD Querier [4/6]:\t%c/%c\n",
+ shadowing4, shadowing6);
+ seq_puts(seq, "-------------------------------------------\n");
+ seq_printf(seq, " %-10s %s\n", "Originator", "Flags");
+}
+
+/**
+ * batadv_mcast_flags_seq_print_text - print the mcast flags of other nodes
+ * @seq: seq file to print on
+ * @offset: not used
+ *
+ * This prints a table of (primary) originators and their according
+ * multicast flags, including (in the header) our own.
+ *
+ * Return: always 0
+ */
+int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
+{
+ struct net_device *net_dev = (struct net_device *)seq->private;
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ struct batadv_hard_iface *primary_if;
+ struct batadv_hashtable *hash = bat_priv->orig_hash;
+ struct batadv_orig_node *orig_node;
+ struct hlist_head *head;
+ u8 flags;
+ u32 i;
+
+ primary_if = batadv_seq_print_text_primary_if_get(seq);
+ if (!primary_if)
+ return 0;
+
+ batadv_mcast_flags_print_header(bat_priv, seq);
+
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capa_initialized))
+ continue;
+
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig_node->capabilities)) {
+ seq_printf(seq, "%pM -\n", orig_node->orig);
+ continue;
+ }
+
+ flags = orig_node->mcast_flags;
+
+ seq_printf(seq, "%pM [%c%c%c]\n", orig_node->orig,
+ (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)
+ ? 'U' : '.',
+ (flags & BATADV_MCAST_WANT_ALL_IPV4)
+ ? '4' : '.',
+ (flags & BATADV_MCAST_WANT_ALL_IPV6)
+ ? '6' : '.');
+ }
+ rcu_read_unlock();
+ }
+
+ batadv_hardif_put(primary_if);
+
+ return 0;
+}
+
+/**
* batadv_mcast_free - free the multicast optimizations structures
* @bat_priv: the bat priv with all the soft interface information
*/
void batadv_mcast_free(struct batadv_priv *bat_priv)
{
- batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
- batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1);
+ batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
spin_lock_bh(&bat_priv->tt.commit_lock);
batadv_mcast_mla_tt_retract(bat_priv, NULL);
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 80bceec55592..1fb00ba84907 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -20,6 +20,7 @@
#include "main.h"
+struct seq_file;
struct sk_buff;
/**
@@ -46,6 +47,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
void batadv_mcast_init(struct batadv_priv *bat_priv);
+int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
+
void batadv_mcast_free(struct batadv_priv *bat_priv);
void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
new file mode 100644
index 000000000000..231f8eaf075b
--- /dev/null
+++ b/net/batman-adv/netlink.c
@@ -0,0 +1,424 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "netlink.h"
+#include "main.h"
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genetlink.h>
+#include <linux/if_ether.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/printk.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
+
+#include "hard-interface.h"
+#include "soft-interface.h"
+#include "tp_meter.h"
+
+struct sk_buff;
+
+static struct genl_family batadv_netlink_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = BATADV_NL_NAME,
+ .version = 1,
+ .maxattr = BATADV_ATTR_MAX,
+};
+
+/* multicast groups */
+enum batadv_netlink_multicast_groups {
+ BATADV_NL_MCGRP_TPMETER,
+};
+
+static struct genl_multicast_group batadv_netlink_mcgrps[] = {
+ [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
+};
+
+static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
+ [BATADV_ATTR_VERSION] = { .type = NLA_STRING },
+ [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 },
+ [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING },
+ [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN },
+ [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 },
+ [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 },
+ [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 },
+ [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 },
+};
+
+/**
+ * batadv_netlink_mesh_info_put - fill in generic information about mesh
+ * interface
+ * @msg: netlink message to be sent back
+ * @soft_iface: interface for which the data should be taken
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
+{
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ struct batadv_hard_iface *primary_if = NULL;
+ struct net_device *hard_iface;
+ int ret = -ENOBUFS;
+
+ if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
+ nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
+ bat_priv->algo_ops->name) ||
+ nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
+ nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
+ nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
+ soft_iface->dev_addr))
+ goto out;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
+ hard_iface = primary_if->net_dev;
+
+ if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ hard_iface->ifindex) ||
+ nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ hard_iface->name) ||
+ nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+ hard_iface->dev_addr))
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (primary_if)
+ batadv_hardif_put(primary_if);
+
+ return ret;
+}
+
+/**
+ * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO
+ * netlink request
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct net_device *soft_iface;
+ struct sk_buff *msg = NULL;
+ void *msg_head;
+ int ifindex;
+ int ret;
+
+ if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+ if (!ifindex)
+ return -EINVAL;
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &batadv_netlink_family, 0,
+ BATADV_CMD_GET_MESH_INFO);
+ if (!msg_head) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ ret = batadv_netlink_mesh_info_put(msg, soft_iface);
+
+ out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ if (ret) {
+ if (msg)
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ genlmsg_end(msg, msg_head);
+ return genlmsg_reply(msg, info);
+}
+
+/**
+ * batadv_netlink_tp_meter_put - Fill information of started tp_meter session
+ * @msg: netlink message to be sent back
+ * @cookie: tp meter session cookie
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie)
+{
+ if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
+ return -ENOBUFS;
+
+ return 0;
+}
+
+/**
+ * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: destination of tp_meter session
+ * @result: reason for tp meter session stop
+ * @test_time: total time ot the tp_meter session
+ * @total_bytes: bytes acked to the receiver
+ * @cookie: cookie of tp_meter session
+ *
+ * Return: 0 on success, < 0 on error
+ */
+int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
+ u8 result, u32 test_time, u64 total_bytes,
+ u32 cookie)
+{
+ struct sk_buff *msg;
+ void *hdr;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, 0, 0, &batadv_netlink_family, 0,
+ BATADV_CMD_TP_METER);
+ if (!hdr) {
+ ret = -ENOBUFS;
+ goto err_genlmsg;
+ }
+
+ if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, BATADV_ATTR_TPMETER_TEST_TIME, test_time))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, BATADV_ATTR_TPMETER_BYTES, total_bytes,
+ BATADV_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, BATADV_ATTR_TPMETER_RESULT, result))
+ goto nla_put_failure;
+
+ if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, dst))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&batadv_netlink_family,
+ dev_net(bat_priv->soft_iface), msg, 0,
+ BATADV_NL_MCGRP_TPMETER, GFP_KERNEL);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ ret = -EMSGSIZE;
+
+err_genlmsg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+/**
+ * batadv_netlink_tp_meter_start - Start a new tp_meter session
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ struct sk_buff *msg = NULL;
+ u32 test_length;
+ void *msg_head;
+ int ifindex;
+ u32 cookie;
+ u8 *dst;
+ int ret;
+
+ if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
+ return -EINVAL;
+
+ if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME])
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+ if (!ifindex)
+ return -EINVAL;
+
+ dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
+
+ test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]);
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &batadv_netlink_family, 0,
+ BATADV_CMD_TP_METER);
+ if (!msg_head) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ batadv_tp_start(bat_priv, dst, test_length, &cookie);
+
+ ret = batadv_netlink_tp_meter_put(msg, cookie);
+
+ out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ if (ret) {
+ if (msg)
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ genlmsg_end(msg, msg_head);
+ return genlmsg_reply(msg, info);
+}
+
+/**
+ * batadv_netlink_tp_meter_start - Cancel a running tp_meter session
+ * @skb: received netlink message
+ * @info: receiver information
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int
+batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct net_device *soft_iface;
+ struct batadv_priv *bat_priv;
+ int ifindex;
+ u8 *dst;
+ int ret = 0;
+
+ if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS])
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
+ if (!ifindex)
+ return -EINVAL;
+
+ dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]);
+
+ soft_iface = dev_get_by_index(net, ifindex);
+ if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ bat_priv = netdev_priv(soft_iface);
+ batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL);
+
+out:
+ if (soft_iface)
+ dev_put(soft_iface);
+
+ return ret;
+}
+
+static struct genl_ops batadv_netlink_ops[] = {
+ {
+ .cmd = BATADV_CMD_GET_MESH_INFO,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_get_mesh_info,
+ },
+ {
+ .cmd = BATADV_CMD_TP_METER,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_tp_meter_start,
+ },
+ {
+ .cmd = BATADV_CMD_TP_METER_CANCEL,
+ .flags = GENL_ADMIN_PERM,
+ .policy = batadv_netlink_policy,
+ .doit = batadv_netlink_tp_meter_cancel,
+ },
+};
+
+/**
+ * batadv_netlink_register - register batadv genl netlink family
+ */
+void __init batadv_netlink_register(void)
+{
+ int ret;
+
+ ret = genl_register_family_with_ops_groups(&batadv_netlink_family,
+ batadv_netlink_ops,
+ batadv_netlink_mcgrps);
+ if (ret)
+ pr_warn("unable to register netlink family");
+}
+
+/**
+ * batadv_netlink_unregister - unregister batadv genl netlink family
+ */
+void batadv_netlink_unregister(void)
+{
+ genl_unregister_family(&batadv_netlink_family);
+}
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
new file mode 100644
index 000000000000..945653ab58c6
--- /dev/null
+++ b/net/batman-adv/netlink.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2016 B.A.T.M.A.N. contributors:
+ *
+ * Matthias Schiffer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_NETLINK_H_
+#define _NET_BATMAN_ADV_NETLINK_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+void batadv_netlink_register(void);
+void batadv_netlink_unregister(void);
+
+int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
+ u8 result, u32 test_time, u64 total_bytes,
+ u32 cookie);
+
+#endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 678f06865312..293ef4ffd4e1 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -51,10 +51,12 @@
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "originator.h"
#include "packet.h"
#include "routing.h"
#include "send.h"
+#include "tvlv.h"
static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 7f51bc2c06eb..7d1e5421f6bc 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -34,11 +34,13 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "distributed-arp-table.h"
#include "fragmentation.h"
#include "gateway_client.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "multicast.h"
#include "network-coding.h"
#include "routing.h"
@@ -251,10 +253,8 @@ static void batadv_neigh_node_release(struct kref *ref)
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
struct batadv_neigh_ifinfo *neigh_ifinfo;
- struct batadv_algo_ops *bao;
neigh_node = container_of(ref, struct batadv_neigh_node, refcount);
- bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
&neigh_node->ifinfo_list, list) {
@@ -263,9 +263,6 @@ static void batadv_neigh_node_release(struct kref *ref)
batadv_hardif_neigh_put(neigh_node->hardif_neigh);
- if (bao->bat_neigh_free)
- bao->bat_neigh_free(neigh_node);
-
batadv_hardif_put(neigh_node->if_incoming);
kfree_rcu(neigh_node, rcu);
@@ -537,8 +534,8 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
kref_init(&hardif_neigh->refcount);
- if (bat_priv->bat_algo_ops->bat_hardif_neigh_init)
- bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh);
+ if (bat_priv->algo_ops->neigh.hardif_init)
+ bat_priv->algo_ops->neigh.hardif_init(hardif_neigh);
hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list);
@@ -602,19 +599,19 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
}
/**
- * batadv_neigh_node_new - create and init a new neigh_node object
+ * batadv_neigh_node_create - create a neigh node object
* @orig_node: originator object representing the neighbour
* @hard_iface: the interface where the neighbour is connected to
* @neigh_addr: the mac address of the neighbour interface
*
* Allocates a new neigh_node object and initialises all the generic fields.
*
- * Return: neighbor when found. Othwerwise NULL
+ * Return: the neighbour node if found or created or NULL otherwise.
*/
-struct batadv_neigh_node *
-batadv_neigh_node_new(struct batadv_orig_node *orig_node,
- struct batadv_hard_iface *hard_iface,
- const u8 *neigh_addr)
+static struct batadv_neigh_node *
+batadv_neigh_node_create(struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr)
{
struct batadv_neigh_node *neigh_node;
struct batadv_hardif_neigh_node *hardif_neigh = NULL;
@@ -667,6 +664,29 @@ out:
}
/**
+ * batadv_neigh_node_get_or_create - retrieve or create a neigh node object
+ * @orig_node: originator object representing the neighbour
+ * @hard_iface: the interface where the neighbour is connected to
+ * @neigh_addr: the mac address of the neighbour interface
+ *
+ * Return: the neighbour node if found or created or NULL otherwise.
+ */
+struct batadv_neigh_node *
+batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr)
+{
+ struct batadv_neigh_node *neigh_node = NULL;
+
+ /* first check without locking to avoid the overhead */
+ neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
+ if (neigh_node)
+ return neigh_node;
+
+ return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr);
+}
+
+/**
* batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
* @seq: neighbour table seq_file struct
* @offset: not used
@@ -686,17 +706,17 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
BATADV_SOURCE_VERSION, primary_if->net_dev->name,
primary_if->net_dev->dev_addr, net_dev->name,
- bat_priv->bat_algo_ops->name);
+ bat_priv->algo_ops->name);
batadv_hardif_put(primary_if);
- if (!bat_priv->bat_algo_ops->bat_neigh_print) {
+ if (!bat_priv->algo_ops->neigh.print) {
seq_puts(seq,
"No printing function for this routing protocol\n");
return 0;
}
- bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq);
+ bat_priv->algo_ops->neigh.print(bat_priv, seq);
return 0;
}
@@ -747,8 +767,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
batadv_frag_purge_orig(orig_node, NULL);
- if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
- orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+ if (orig_node->bat_priv->algo_ops->orig.free)
+ orig_node->bat_priv->algo_ops->orig.free(orig_node);
kfree(orig_node->tt_buff);
kfree(orig_node);
@@ -1077,12 +1097,12 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
struct batadv_hard_iface *if_outgoing)
{
struct batadv_neigh_node *best = NULL, *neigh;
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
rcu_read_lock();
hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) {
- if (best && (bao->bat_neigh_cmp(neigh, if_outgoing,
- best, if_outgoing) <= 0))
+ if (best && (bao->neigh.cmp(neigh, if_outgoing, best,
+ if_outgoing) <= 0))
continue;
if (!kref_get_unless_zero(&neigh->refcount))
@@ -1234,18 +1254,17 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
BATADV_SOURCE_VERSION, primary_if->net_dev->name,
primary_if->net_dev->dev_addr, net_dev->name,
- bat_priv->bat_algo_ops->name);
+ bat_priv->algo_ops->name);
batadv_hardif_put(primary_if);
- if (!bat_priv->bat_algo_ops->bat_orig_print) {
+ if (!bat_priv->algo_ops->orig.print) {
seq_puts(seq,
"No printing function for this routing protocol\n");
return 0;
}
- bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq,
- BATADV_IF_DEFAULT);
+ bat_priv->algo_ops->orig.print(bat_priv, seq, BATADV_IF_DEFAULT);
return 0;
}
@@ -1272,7 +1291,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
}
bat_priv = netdev_priv(hard_iface->soft_iface);
- if (!bat_priv->bat_algo_ops->bat_orig_print) {
+ if (!bat_priv->algo_ops->orig.print) {
seq_puts(seq,
"No printing function for this routing protocol\n");
goto out;
@@ -1286,9 +1305,9 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n",
BATADV_SOURCE_VERSION, hard_iface->net_dev->name,
hard_iface->net_dev->dev_addr,
- hard_iface->soft_iface->name, bat_priv->bat_algo_ops->name);
+ hard_iface->soft_iface->name, bat_priv->algo_ops->name);
- bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface);
+ bat_priv->algo_ops->orig.print(bat_priv, seq, hard_iface);
out:
if (hard_iface)
@@ -1300,7 +1319,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
int max_if_num)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
struct batadv_hashtable *hash = bat_priv->orig_hash;
struct hlist_head *head;
struct batadv_orig_node *orig_node;
@@ -1316,9 +1335,8 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
ret = 0;
- if (bao->bat_orig_add_if)
- ret = bao->bat_orig_add_if(orig_node,
- max_if_num);
+ if (bao->orig.add_if)
+ ret = bao->orig.add_if(orig_node, max_if_num);
if (ret == -ENOMEM)
goto err;
}
@@ -1340,7 +1358,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
struct hlist_head *head;
struct batadv_hard_iface *hard_iface_tmp;
struct batadv_orig_node *orig_node;
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
u32 i;
int ret;
@@ -1353,10 +1371,9 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
rcu_read_lock();
hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
ret = 0;
- if (bao->bat_orig_del_if)
- ret = bao->bat_orig_del_if(orig_node,
- max_if_num,
- hard_iface->if_num);
+ if (bao->orig.del_if)
+ ret = bao->orig.del_if(orig_node, max_if_num,
+ hard_iface->if_num);
if (ret == -ENOMEM)
goto err;
}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 64a8951e5844..566306bf05dc 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -46,9 +46,9 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
void
batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
struct batadv_neigh_node *
-batadv_neigh_node_new(struct batadv_orig_node *orig_node,
- struct batadv_hard_iface *hard_iface,
- const u8 *neigh_addr);
+batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
+ struct batadv_hard_iface *hard_iface,
+ const u8 *neigh_addr);
void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 372128ddb474..6b011ff64dd8 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -21,6 +21,8 @@
#include <asm/byteorder.h>
#include <linux/types.h>
+#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0)
+
/**
* enum batadv_packettype - types for batman-adv encapsulated packets
* @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
@@ -93,6 +95,7 @@ enum batadv_icmp_packettype {
BATADV_ECHO_REQUEST = 8,
BATADV_TTL_EXCEEDED = 11,
BATADV_PARAMETER_PROBLEM = 12,
+ BATADV_TP = 15,
};
/**
@@ -285,6 +288,16 @@ struct batadv_elp_packet {
#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
/**
+ * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes
+ * @BATADV_TP_START: start a throughput meter run
+ * @BATADV_TP_STOP: stop a throughput meter run
+ */
+enum batadv_icmp_user_cmd_type {
+ BATADV_TP_START = 0,
+ BATADV_TP_STOP = 2,
+};
+
+/**
* struct batadv_icmp_header - common members among all the ICMP packets
* @packet_type: batman-adv packet type, part of the general header
* @version: batman-adv protocol version, part of the genereal header
@@ -334,6 +347,47 @@ struct batadv_icmp_packet {
__be16 seqno;
};
+/**
+ * struct batadv_icmp_tp_packet - ICMP TP Meter packet
+ * @packet_type: batman-adv packet type, part of the general header
+ * @version: batman-adv protocol version, part of the genereal header
+ * @ttl: time to live for this packet, part of the genereal header
+ * @msg_type: ICMP packet type
+ * @dst: address of the destination node
+ * @orig: address of the source node
+ * @uid: local ICMP socket identifier
+ * @subtype: TP packet subtype (see batadv_icmp_tp_subtype)
+ * @session: TP session identifier
+ * @seqno: the TP sequence number
+ * @timestamp: time when the packet has been sent. This value is filled in a
+ * TP_MSG and echoed back in the next TP_ACK so that the sender can compute the
+ * RTT. Since it is read only by the host which wrote it, there is no need to
+ * store it using network order
+ */
+struct batadv_icmp_tp_packet {
+ u8 packet_type;
+ u8 version;
+ u8 ttl;
+ u8 msg_type; /* see ICMP message types above */
+ u8 dst[ETH_ALEN];
+ u8 orig[ETH_ALEN];
+ u8 uid;
+ u8 subtype;
+ u8 session[2];
+ __be32 seqno;
+ __be32 timestamp;
+};
+
+/**
+ * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes
+ * @BATADV_TP_MSG: Msg from sender to receiver
+ * @BATADV_TP_ACK: acknowledgment from receiver to sender
+ */
+enum batadv_icmp_tp_subtype {
+ BATADV_TP_MSG = 0,
+ BATADV_TP_ACK,
+};
+
#define BATADV_RR_LEN 16
/**
@@ -420,6 +474,7 @@ struct batadv_unicast_4addr_packet {
* @dest: final destination used when routing fragments
* @orig: originator of the fragment used when merging the packet
* @no: fragment number within this sequence
+ * @priority: priority of frame, from ToS IP precedence or 802.1p
* @reserved: reserved byte for alignment
* @seqno: sequence identification
* @total_size: size of the merged packet
@@ -430,9 +485,11 @@ struct batadv_frag_packet {
u8 ttl;
#if defined(__BIG_ENDIAN_BITFIELD)
u8 no:4;
- u8 reserved:4;
+ u8 priority:3;
+ u8 reserved:1;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
- u8 reserved:4;
+ u8 reserved:1;
+ u8 priority:3;
u8 no:4;
#else
#error "unknown bitfield endianness"
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 396c0134c5ab..af8e11933928 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -40,12 +40,15 @@
#include "fragmentation.h"
#include "hard-interface.h"
#include "icmp_socket.h"
+#include "log.h"
#include "network-coding.h"
#include "originator.h"
#include "packet.h"
#include "send.h"
#include "soft-interface.h"
+#include "tp_meter.h"
#include "translation-table.h"
+#include "tvlv.h"
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
@@ -268,10 +271,19 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
icmph->ttl = BATADV_TTL;
res = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (res != NET_XMIT_DROP)
- ret = NET_RX_SUCCESS;
+ if (res == -1)
+ goto out;
+
+ ret = NET_RX_SUCCESS;
break;
+ case BATADV_TP:
+ if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet)))
+ goto out;
+
+ batadv_tp_meter_recv(bat_priv, skb);
+ ret = NET_RX_SUCCESS;
+ goto out;
default:
/* drop unknown type */
goto out;
@@ -290,7 +302,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_orig_node *orig_node = NULL;
struct batadv_icmp_packet *icmp_packet;
- int ret = NET_RX_DROP;
+ int res, ret = NET_RX_DROP;
icmp_packet = (struct batadv_icmp_packet *)skb->data;
@@ -321,7 +333,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
icmp_packet->msg_type = BATADV_TTL_EXCEEDED;
icmp_packet->ttl = BATADV_TTL;
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+ res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (res != -1)
ret = NET_RX_SUCCESS;
out:
@@ -341,7 +354,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
struct ethhdr *ethhdr;
struct batadv_orig_node *orig_node = NULL;
int hdr_size = sizeof(struct batadv_icmp_header);
- int ret = NET_RX_DROP;
+ int res, ret = NET_RX_DROP;
/* drop packet if it has not necessary minimum size */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
@@ -408,7 +421,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
icmph->ttl--;
/* route it */
- if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
+ res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
+ if (res != -1)
ret = NET_RX_SUCCESS;
out:
@@ -469,7 +483,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_hard_iface *recv_if)
{
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
struct batadv_neigh_node *first_candidate_router = NULL;
struct batadv_neigh_node *next_candidate_router = NULL;
struct batadv_neigh_node *router, *cand_router = NULL;
@@ -523,9 +537,9 @@ batadv_find_router(struct batadv_priv *bat_priv,
/* alternative candidate should be good enough to be
* considered
*/
- if (!bao->bat_neigh_is_similar_or_better(cand_router,
- cand->if_outgoing,
- router, recv_if))
+ if (!bao->neigh.is_similar_or_better(cand_router,
+ cand->if_outgoing, router,
+ recv_if))
goto next;
/* don't use the same router twice */
@@ -645,6 +659,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
len = skb->len;
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
+ if (res == -1)
+ goto out;
/* translate transmit result into receive result */
if (res == NET_XMIT_SUCCESS) {
@@ -652,13 +668,10 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
len + ETH_HLEN);
-
- ret = NET_RX_SUCCESS;
- } else if (res == -EINPROGRESS) {
- /* skb was buffered and consumed */
- ret = NET_RX_SUCCESS;
}
+ ret = NET_RX_SUCCESS;
+
out:
if (orig_node)
batadv_orig_node_put(orig_node);
@@ -1007,6 +1020,8 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
if (!orig_node_src)
goto out;
+ skb->priority = frag_packet->priority + 256;
+
/* Route the fragment if it is not for us and too big to be merged. */
if (!batadv_is_my_mac(bat_priv, frag_packet->dest) &&
batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) {
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index b1a4e8a811c8..3a10d87b4b76 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -20,10 +20,11 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
+#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/fs.h>
-#include <linux/if_ether.h>
#include <linux/if.h>
+#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/kref.h>
@@ -42,6 +43,7 @@
#include "fragmentation.h"
#include "gateway_client.h"
#include "hard-interface.h"
+#include "log.h"
#include "network-coding.h"
#include "originator.h"
#include "routing.h"
@@ -71,6 +73,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
{
struct batadv_priv *bat_priv;
struct ethhdr *ethhdr;
+ int ret;
bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -108,8 +111,15 @@ int batadv_send_skb_packet(struct sk_buff *skb,
/* dev_queue_xmit() returns a negative result on error. However on
* congestion and traffic shaping, it drops and returns NET_XMIT_DROP
* (which is > 0). This will not be treated as an error.
+ *
+ * a negative value cannot be returned because it could be interepreted
+ * as not consumed skb by callers of batadv_send_skb_to_orig.
*/
- return dev_queue_xmit(skb);
+ ret = dev_queue_xmit(skb);
+ if (ret < 0)
+ ret = NET_XMIT_DROP;
+
+ return ret;
send_skb_err:
kfree_skb(skb);
return NET_XMIT_DROP;
@@ -155,8 +165,11 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
* host, NULL can be passed as recv_if and no interface alternating is
* attempted.
*
- * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
- * -EINPROGRESS if the skb is buffered for later transmit.
+ * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the
+ * skb is buffered for later transmit or the NET_XMIT status returned by the
+ * lower routine if the packet has been passed down.
+ *
+ * If the returning value is not -1 the skb has been consumed.
*/
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
@@ -164,7 +177,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = orig_node->bat_priv;
struct batadv_neigh_node *neigh_node;
- int ret = NET_XMIT_DROP;
+ int ret = -1;
/* batadv_find_router() increases neigh_nodes refcount if found. */
neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
@@ -177,8 +190,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
if (atomic_read(&bat_priv->fragmentation) &&
skb->len > neigh_node->if_incoming->net_dev->mtu) {
/* Fragment and send packet. */
- if (batadv_frag_send_packet(skb, orig_node, neigh_node))
- ret = NET_XMIT_SUCCESS;
+ ret = batadv_frag_send_packet(skb, orig_node, neigh_node);
goto out;
}
@@ -187,12 +199,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
* (i.e. being forwarded). If the packet originates from this node or if
* network coding fails, then send the packet as usual.
*/
- if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
+ if (recv_if && batadv_nc_skb_forward(skb, neigh_node))
ret = -EINPROGRESS;
- } else {
- batadv_send_unicast_skb(skb, neigh_node);
- ret = NET_XMIT_SUCCESS;
- }
+ else
+ ret = batadv_send_unicast_skb(skb, neigh_node);
out:
if (neigh_node)
@@ -318,7 +328,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
{
struct batadv_unicast_packet *unicast_packet;
struct ethhdr *ethhdr;
- int ret = NET_XMIT_DROP;
+ int res, ret = NET_XMIT_DROP;
if (!orig_node)
goto out;
@@ -355,7 +365,8 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid))
unicast_packet->ttvn = unicast_packet->ttvn - 1;
- if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
+ res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (res != -1)
ret = NET_XMIT_SUCCESS;
out:
@@ -428,27 +439,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
orig_node, vid);
}
-void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
-{
- struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-
- if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
- (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
- return;
-
- /* the interface gets activated here to avoid race conditions between
- * the moment of activating the interface in
- * hardif_activate_interface() where the originator mac is set and
- * outdated packets (especially uninitialized mac addresses) in the
- * packet queue
- */
- if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED)
- hard_iface->if_status = BATADV_IF_ACTIVE;
-
- bat_priv->bat_algo_ops->bat_ogm_schedule(hard_iface);
-}
-
-static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
{
kfree_skb(forw_packet->skb);
if (forw_packet->if_incoming)
@@ -604,45 +595,6 @@ out:
atomic_inc(&bat_priv->bcast_queue_left);
}
-void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work)
-{
- struct delayed_work *delayed_work;
- struct batadv_forw_packet *forw_packet;
- struct batadv_priv *bat_priv;
-
- delayed_work = to_delayed_work(work);
- forw_packet = container_of(delayed_work, struct batadv_forw_packet,
- delayed_work);
- bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
- spin_lock_bh(&bat_priv->forw_bat_list_lock);
- hlist_del(&forw_packet->list);
- spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
- if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
- goto out;
-
- bat_priv->bat_algo_ops->bat_ogm_emit(forw_packet);
-
- /* we have to have at least one packet in the queue to determine the
- * queues wake up time unless we are shutting down.
- *
- * only re-schedule if this is the "original" copy, e.g. the OGM of the
- * primary interface should only be rescheduled once per period, but
- * this function will be called for the forw_packet instances of the
- * other secondary interfaces as well.
- */
- if (forw_packet->own &&
- forw_packet->if_incoming == forw_packet->if_outgoing)
- batadv_schedule_bat_ogm(forw_packet->if_incoming);
-
-out:
- /* don't count own packet */
- if (!forw_packet->own)
- atomic_inc(&bat_priv->batman_queue_left);
-
- batadv_forw_packet_free(forw_packet);
-}
-
void
batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
const struct batadv_hard_iface *hard_iface)
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 6fd7270d8ce6..7cecb7563b45 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -26,8 +26,8 @@
#include "packet.h"
struct sk_buff;
-struct work_struct;
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
struct batadv_hard_iface *recv_if);
@@ -38,11 +38,9 @@ int batadv_send_broadcast_skb(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface);
int batadv_send_unicast_skb(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node);
-void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface);
int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
const struct sk_buff *skb,
unsigned long delay);
-void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work);
void
batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
const struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 287a3879ed7e..7527c0652dd5 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -48,6 +48,7 @@
#include <linux/types.h>
#include <linux/workqueue.h>
+#include "bat_algo.h"
#include "bridge_loop_avoidance.h"
#include "debugfs.h"
#include "distributed-arp-table.h"
@@ -255,7 +256,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
if (batadv_compare_eth(ethhdr->h_dest, ectp_addr))
goto dropped;
- gw_mode = atomic_read(&bat_priv->gw_mode);
+ gw_mode = atomic_read(&bat_priv->gw.mode);
if (is_multicast_ether_addr(ethhdr->h_dest)) {
/* if gw mode is off, broadcast every packet */
if (gw_mode == BATADV_GW_MODE_OFF) {
@@ -808,6 +809,10 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->distributed_arp_table, 1);
#endif
#ifdef CONFIG_BATMAN_ADV_MCAST
+ bat_priv->mcast.querier_ipv4.exists = false;
+ bat_priv->mcast.querier_ipv4.shadowing = false;
+ bat_priv->mcast.querier_ipv6.exists = false;
+ bat_priv->mcast.querier_ipv6.shadowing = false;
bat_priv->mcast.flags = BATADV_NO_FLAGS;
atomic_set(&bat_priv->multicast_mode, 1);
atomic_set(&bat_priv->mcast.num_disabled, 0);
@@ -815,8 +820,8 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
#endif
- atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF);
- atomic_set(&bat_priv->gw_sel_class, 20);
+ atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
+ atomic_set(&bat_priv->gw.sel_class, 20);
atomic_set(&bat_priv->gw.bandwidth_down, 100);
atomic_set(&bat_priv->gw.bandwidth_up, 20);
atomic_set(&bat_priv->orig_interval, 1000);
@@ -837,6 +842,8 @@ static int batadv_softif_init_late(struct net_device *dev)
#ifdef CONFIG_BATMAN_ADV_BLA
atomic_set(&bat_priv->bla.num_requests, 0);
#endif
+ atomic_set(&bat_priv->tp_num, 0);
+
bat_priv->tt.last_changeset = NULL;
bat_priv->tt.last_changeset_len = 0;
bat_priv->isolation_mark = 0;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 414b2074165f..fe9ca94ddee2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -25,8 +25,8 @@
#include <linux/fs.h>
#include <linux/if.h>
#include <linux/if_vlan.h>
-#include <linux/kref.h>
#include <linux/kernel.h>
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/rculist.h>
@@ -38,11 +38,12 @@
#include <linux/string.h>
#include <linux/stringify.h>
+#include "bridge_loop_avoidance.h"
#include "distributed-arp-table.h"
#include "gateway_client.h"
#include "gateway_common.h"
-#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
+#include "log.h"
#include "network-coding.h"
#include "packet.h"
#include "soft-interface.h"
@@ -389,12 +390,12 @@ static int batadv_store_uint_attr(const char *buff, size_t count,
return count;
}
-static inline ssize_t
-__batadv_store_uint_attr(const char *buff, size_t count,
- int min, int max,
- void (*post_func)(struct net_device *),
- const struct attribute *attr,
- atomic_t *attr_store, struct net_device *net_dev)
+static ssize_t __batadv_store_uint_attr(const char *buff, size_t count,
+ int min, int max,
+ void (*post_func)(struct net_device *),
+ const struct attribute *attr,
+ atomic_t *attr_store,
+ struct net_device *net_dev)
{
int ret;
@@ -411,7 +412,7 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj,
{
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
- return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name);
+ return sprintf(buff, "%s\n", bat_priv->algo_ops->name);
}
static void batadv_post_gw_reselect(struct net_device *net_dev)
@@ -427,7 +428,7 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr,
struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
int bytes_written;
- switch (atomic_read(&bat_priv->gw_mode)) {
+ switch (atomic_read(&bat_priv->gw.mode)) {
case BATADV_GW_MODE_CLIENT:
bytes_written = sprintf(buff, "%s\n",
BATADV_GW_MODE_CLIENT_NAME);
@@ -476,10 +477,10 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
return -EINVAL;
}
- if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp)
+ if (atomic_read(&bat_priv->gw.mode) == gw_mode_tmp)
return count;
- switch (atomic_read(&bat_priv->gw_mode)) {
+ switch (atomic_read(&bat_priv->gw.mode)) {
case BATADV_GW_MODE_CLIENT:
curr_gw_mode_str = BATADV_GW_MODE_CLIENT_NAME;
break;
@@ -508,7 +509,7 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
* state
*/
batadv_gw_check_client_stop(bat_priv);
- atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp);
+ atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp);
batadv_gw_tvlv_container_update(bat_priv);
return count;
}
@@ -624,7 +625,7 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
2 * BATADV_JITTER, INT_MAX, NULL);
BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
BATADV_TQ_MAX_VALUE, NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, gw_sel_class, S_IRUGO | S_IWUSR, 1,
+BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1,
BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
batadv_store_gw_bwidth);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
new file mode 100644
index 000000000000..2333777f919d
--- /dev/null
+++ b/net/batman-adv/tp_meter.c
@@ -0,0 +1,1507 @@
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
+ *
+ * Edo Monticelli, Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "tp_meter.h"
+#include "main.h"
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/param.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/batman_adv.h>
+
+#include "hard-interface.h"
+#include "log.h"
+#include "netlink.h"
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+
+/**
+ * BATADV_TP_DEF_TEST_LENGTH - Default test length if not specified by the user
+ * in milliseconds
+ */
+#define BATADV_TP_DEF_TEST_LENGTH 10000
+
+/**
+ * BATADV_TP_AWND - Advertised window by the receiver (in bytes)
+ */
+#define BATADV_TP_AWND 0x20000000
+
+/**
+ * BATADV_TP_RECV_TIMEOUT - Receiver activity timeout. If the receiver does not
+ * get anything for such amount of milliseconds, the connection is killed
+ */
+#define BATADV_TP_RECV_TIMEOUT 1000
+
+/**
+ * BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond
+ * such amound of milliseconds, the receiver is considered unreachable and the
+ * connection is killed
+ */
+#define BATADV_TP_MAX_RTO 30000
+
+/**
+ * BATADV_TP_FIRST_SEQ - First seqno of each session. The number is rather high
+ * in order to immediately trigger a wrap around (test purposes)
+ */
+#define BATADV_TP_FIRST_SEQ ((u32)-1 - 2000)
+
+/**
+ * BATADV_TP_PLEN - length of the payload (data after the batadv_unicast header)
+ * to simulate
+ */
+#define BATADV_TP_PLEN (BATADV_TP_PACKET_LEN - ETH_HLEN - \
+ sizeof(struct batadv_unicast_packet))
+
+static u8 batadv_tp_prerandom[4096] __read_mostly;
+
+/**
+ * batadv_tp_session_cookie - generate session cookie based on session ids
+ * @session: TP session identifier
+ * @icmp_uid: icmp pseudo uid of the tp session
+ *
+ * Return: 32 bit tp_meter session cookie
+ */
+static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid)
+{
+ u32 cookie;
+
+ cookie = icmp_uid << 16;
+ cookie |= session[0] << 8;
+ cookie |= session[1];
+
+ return cookie;
+}
+
+/**
+ * batadv_tp_cwnd - compute the new cwnd size
+ * @base: base cwnd size value
+ * @increment: the value to add to base to get the new size
+ * @min: minumim cwnd value (usually MSS)
+ *
+ * Return the new cwnd size and ensures it does not exceed the Advertised
+ * Receiver Window size. It is wrap around safe.
+ * For details refer to Section 3.1 of RFC5681
+ *
+ * Return: new congestion window size in bytes
+ */
+static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min)
+{
+ u32 new_size = base + increment;
+
+ /* check for wrap-around */
+ if (new_size < base)
+ new_size = (u32)ULONG_MAX;
+
+ new_size = min_t(u32, new_size, BATADV_TP_AWND);
+
+ return max_t(u32, new_size, min);
+}
+
+/**
+ * batadv_tp_updated_cwnd - update the Congestion Windows
+ * @tp_vars: the private data of the current TP meter session
+ * @mss: maximum segment size of transmission
+ *
+ * 1) if the session is in Slow Start, the CWND has to be increased by 1
+ * MSS every unique received ACK
+ * 2) if the session is in Congestion Avoidance, the CWND has to be
+ * increased by MSS * MSS / CWND for every unique received ACK
+ */
+static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss)
+{
+ spin_lock_bh(&tp_vars->cwnd_lock);
+
+ /* slow start... */
+ if (tp_vars->cwnd <= tp_vars->ss_threshold) {
+ tp_vars->dec_cwnd = 0;
+ tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss);
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+ return;
+ }
+
+ /* increment CWND at least of 1 (section 3.1 of RFC5681) */
+ tp_vars->dec_cwnd += max_t(u32, 1U << 3,
+ ((mss * mss) << 6) / (tp_vars->cwnd << 3));
+ if (tp_vars->dec_cwnd < (mss << 3)) {
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+ return;
+ }
+
+ tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss);
+ tp_vars->dec_cwnd = 0;
+
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+}
+
+/**
+ * batadv_tp_update_rto - calculate new retransmission timeout
+ * @tp_vars: the private data of the current TP meter session
+ * @new_rtt: new roundtrip time in msec
+ */
+static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars,
+ u32 new_rtt)
+{
+ long m = new_rtt;
+
+ /* RTT update
+ * Details in Section 2.2 and 2.3 of RFC6298
+ *
+ * It's tricky to understand. Don't lose hair please.
+ * Inspired by tcp_rtt_estimator() tcp_input.c
+ */
+ if (tp_vars->srtt != 0) {
+ m -= (tp_vars->srtt >> 3); /* m is now error in rtt est */
+ tp_vars->srtt += m; /* rtt = 7/8 srtt + 1/8 new */
+ if (m < 0)
+ m = -m;
+
+ m -= (tp_vars->rttvar >> 2);
+ tp_vars->rttvar += m; /* mdev ~= 3/4 rttvar + 1/4 new */
+ } else {
+ /* first measure getting in */
+ tp_vars->srtt = m << 3; /* take the measured time to be srtt */
+ tp_vars->rttvar = m << 1; /* new_rtt / 2 */
+ }
+
+ /* rto = srtt + 4 * rttvar.
+ * rttvar is scaled by 4, therefore doesn't need to be multiplied
+ */
+ tp_vars->rto = (tp_vars->srtt >> 3) + tp_vars->rttvar;
+}
+
+/**
+ * batadv_tp_batctl_notify - send client status result to client
+ * @reason: reason for tp meter session stop
+ * @dst: destination of tp_meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @start_time: start of transmission in jiffies
+ * @total_sent: bytes acked to the receiver
+ * @cookie: cookie of tp_meter session
+ */
+static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason,
+ const u8 *dst, struct batadv_priv *bat_priv,
+ unsigned long start_time, u64 total_sent,
+ u32 cookie)
+{
+ u32 test_time;
+ u8 result;
+ u32 total_bytes;
+
+ if (!batadv_tp_is_error(reason)) {
+ result = BATADV_TP_REASON_COMPLETE;
+ test_time = jiffies_to_msecs(jiffies - start_time);
+ total_bytes = total_sent;
+ } else {
+ result = reason;
+ test_time = 0;
+ total_bytes = 0;
+ }
+
+ batadv_netlink_tpmeter_notify(bat_priv, dst, result, test_time,
+ total_bytes, cookie);
+}
+
+/**
+ * batadv_tp_batctl_error_notify - send client error result to client
+ * @reason: reason for tp meter session stop
+ * @dst: destination of tp_meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cookie: cookie of tp_meter session
+ */
+static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason,
+ const u8 *dst,
+ struct batadv_priv *bat_priv,
+ u32 cookie)
+{
+ batadv_tp_batctl_notify(reason, dst, bat_priv, 0, 0, cookie);
+}
+
+/**
+ * batadv_tp_list_find - find a tp_vars object in the global list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the other endpoint MAC address to look for
+ *
+ * Look for a tp_vars object matching dst as end_point and return it after
+ * having incremented the refcounter. Return NULL is not found
+ *
+ * Return: matching tp_vars or NULL when no tp_vars with @dst was found
+ */
+static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv,
+ const u8 *dst)
+{
+ struct batadv_tp_vars *pos, *tp_vars = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) {
+ if (!batadv_compare_eth(pos->other_end, dst))
+ continue;
+
+ /* most of the time this function is invoked during the normal
+ * process..it makes sens to pay more when the session is
+ * finished and to speed the process up during the measurement
+ */
+ if (unlikely(!kref_get_unless_zero(&pos->refcount)))
+ continue;
+
+ tp_vars = pos;
+ break;
+ }
+ rcu_read_unlock();
+
+ return tp_vars;
+}
+
+/**
+ * batadv_tp_list_find_session - find tp_vars session object in the global list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the other endpoint MAC address to look for
+ * @session: session identifier
+ *
+ * Look for a tp_vars object matching dst as end_point, session as tp meter
+ * session and return it after having incremented the refcounter. Return NULL
+ * is not found
+ *
+ * Return: matching tp_vars or NULL when no tp_vars was found
+ */
+static struct batadv_tp_vars *
+batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst,
+ const u8 *session)
+{
+ struct batadv_tp_vars *pos, *tp_vars = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) {
+ if (!batadv_compare_eth(pos->other_end, dst))
+ continue;
+
+ if (memcmp(pos->session, session, sizeof(pos->session)) != 0)
+ continue;
+
+ /* most of the time this function is invoked during the normal
+ * process..it makes sense to pay more when the session is
+ * finished and to speed the process up during the measurement
+ */
+ if (unlikely(!kref_get_unless_zero(&pos->refcount)))
+ continue;
+
+ tp_vars = pos;
+ break;
+ }
+ rcu_read_unlock();
+
+ return tp_vars;
+}
+
+/**
+ * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the batadv_tp_vars
+ */
+static void batadv_tp_vars_release(struct kref *ref)
+{
+ struct batadv_tp_vars *tp_vars;
+ struct batadv_tp_unacked *un, *safe;
+
+ tp_vars = container_of(ref, struct batadv_tp_vars, refcount);
+
+ /* lock should not be needed because this object is now out of any
+ * context!
+ */
+ spin_lock_bh(&tp_vars->unacked_lock);
+ list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+ list_del(&un->list);
+ kfree(un);
+ }
+ spin_unlock_bh(&tp_vars->unacked_lock);
+
+ kfree_rcu(tp_vars, rcu);
+}
+
+/**
+ * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly
+ * release it
+ * @tp_vars: the private data of the current TP meter session to be free'd
+ */
+static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
+{
+ kref_put(&tp_vars->refcount, batadv_tp_vars_release);
+}
+
+/**
+ * batadv_tp_sender_cleanup - cleanup sender data and drop and timer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tp_vars: the private data of the current TP meter session to cleanup
+ */
+static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
+ struct batadv_tp_vars *tp_vars)
+{
+ cancel_delayed_work(&tp_vars->finish_work);
+
+ spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
+ hlist_del_rcu(&tp_vars->list);
+ spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+
+ /* drop list reference */
+ batadv_tp_vars_put(tp_vars);
+
+ atomic_dec(&tp_vars->bat_priv->tp_num);
+
+ /* kill the timer and remove its reference */
+ del_timer_sync(&tp_vars->timer);
+ /* the worker might have rearmed itself therefore we kill it again. Note
+ * that if the worker should run again before invoking the following
+ * del_timer(), it would not re-arm itself once again because the status
+ * is OFF now
+ */
+ del_timer(&tp_vars->timer);
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_sender_end - print info about ended session and inform client
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_sender_end(struct batadv_priv *bat_priv,
+ struct batadv_tp_vars *tp_vars)
+{
+ u32 session_cookie;
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Test towards %pM finished..shutting down (reason=%d)\n",
+ tp_vars->other_end, tp_vars->reason);
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n",
+ tp_vars->srtt >> 3, tp_vars->rttvar >> 2, tp_vars->rto);
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Final values: cwnd=%u ss_threshold=%u\n",
+ tp_vars->cwnd, tp_vars->ss_threshold);
+
+ session_cookie = batadv_tp_session_cookie(tp_vars->session,
+ tp_vars->icmp_uid);
+
+ batadv_tp_batctl_notify(tp_vars->reason,
+ tp_vars->other_end,
+ bat_priv,
+ tp_vars->start_time,
+ atomic64_read(&tp_vars->tot_sent),
+ session_cookie);
+}
+
+/**
+ * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully
+ * @tp_vars: the private data of the current TP meter session
+ * @reason: reason for tp meter session stop
+ */
+static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars,
+ enum batadv_tp_meter_reason reason)
+{
+ if (!atomic_dec_and_test(&tp_vars->sending))
+ return;
+
+ tp_vars->reason = reason;
+}
+
+/**
+ * batadv_tp_sender_finish - stop sender session after test_length was reached
+ * @work: delayed work reference of the related tp_vars
+ */
+static void batadv_tp_sender_finish(struct work_struct *work)
+{
+ struct delayed_work *delayed_work;
+ struct batadv_tp_vars *tp_vars;
+
+ delayed_work = to_delayed_work(work);
+ tp_vars = container_of(delayed_work, struct batadv_tp_vars,
+ finish_work);
+
+ batadv_tp_sender_shutdown(tp_vars, BATADV_TP_REASON_COMPLETE);
+}
+
+/**
+ * batadv_tp_reset_sender_timer - reschedule the sender timer
+ * @tp_vars: the private TP meter data for this session
+ *
+ * Reschedule the timer using tp_vars->rto as delay
+ */
+static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars)
+{
+ /* most of the time this function is invoked while normal packet
+ * reception...
+ */
+ if (unlikely(atomic_read(&tp_vars->sending) == 0))
+ /* timer ref will be dropped in batadv_tp_sender_cleanup */
+ return;
+
+ mod_timer(&tp_vars->timer, jiffies + msecs_to_jiffies(tp_vars->rto));
+}
+
+/**
+ * batadv_tp_sender_timeout - timer that fires in case of packet loss
+ * @arg: address of the related tp_vars
+ *
+ * If fired it means that there was packet loss.
+ * Switch to Slow Start, set the ss_threshold to half of the current cwnd and
+ * reset the cwnd to 3*MSS
+ */
+static void batadv_tp_sender_timeout(unsigned long arg)
+{
+ struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg;
+ struct batadv_priv *bat_priv = tp_vars->bat_priv;
+
+ if (atomic_read(&tp_vars->sending) == 0)
+ return;
+
+ /* if the user waited long enough...shutdown the test */
+ if (unlikely(tp_vars->rto >= BATADV_TP_MAX_RTO)) {
+ batadv_tp_sender_shutdown(tp_vars,
+ BATADV_TP_REASON_DST_UNREACHABLE);
+ return;
+ }
+
+ /* RTO exponential backoff
+ * Details in Section 5.5 of RFC6298
+ */
+ tp_vars->rto <<= 1;
+
+ spin_lock_bh(&tp_vars->cwnd_lock);
+
+ tp_vars->ss_threshold = tp_vars->cwnd >> 1;
+ if (tp_vars->ss_threshold < BATADV_TP_PLEN * 2)
+ tp_vars->ss_threshold = BATADV_TP_PLEN * 2;
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: RTO fired during test towards %pM! cwnd=%u new ss_thr=%u, resetting last_sent to %u\n",
+ tp_vars->other_end, tp_vars->cwnd, tp_vars->ss_threshold,
+ atomic_read(&tp_vars->last_acked));
+
+ tp_vars->cwnd = BATADV_TP_PLEN * 3;
+
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+
+ /* resend the non-ACKed packets.. */
+ tp_vars->last_sent = atomic_read(&tp_vars->last_acked);
+ wake_up(&tp_vars->more_bytes);
+
+ batadv_tp_reset_sender_timer(tp_vars);
+}
+
+/**
+ * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes
+ * @tp_vars: the private TP meter data for this session
+ * @buf: Buffer to fill with bytes
+ * @nbytes: amount of pseudorandom bytes
+ */
+static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars,
+ u8 *buf, size_t nbytes)
+{
+ u32 local_offset;
+ size_t bytes_inbuf;
+ size_t to_copy;
+ size_t pos = 0;
+
+ spin_lock_bh(&tp_vars->prerandom_lock);
+ local_offset = tp_vars->prerandom_offset;
+ tp_vars->prerandom_offset += nbytes;
+ tp_vars->prerandom_offset %= sizeof(batadv_tp_prerandom);
+ spin_unlock_bh(&tp_vars->prerandom_lock);
+
+ while (nbytes) {
+ local_offset %= sizeof(batadv_tp_prerandom);
+ bytes_inbuf = sizeof(batadv_tp_prerandom) - local_offset;
+ to_copy = min(nbytes, bytes_inbuf);
+
+ memcpy(&buf[pos], &batadv_tp_prerandom[local_offset], to_copy);
+ pos += to_copy;
+ nbytes -= to_copy;
+ local_offset = 0;
+ }
+}
+
+/**
+ * batadv_tp_send_msg - send a single message
+ * @tp_vars: the private TP meter data for this session
+ * @src: source mac address
+ * @orig_node: the originator of the destination
+ * @seqno: sequence number of this packet
+ * @len: length of the entire packet
+ * @session: session identifier
+ * @uid: local ICMP "socket" index
+ * @timestamp: timestamp in jiffies which is replied in ack
+ *
+ * Create and send a single TP Meter message.
+ *
+ * Return: 0 on success, BATADV_TP_REASON_DST_UNREACHABLE if the destination is
+ * not reachable, BATADV_TP_REASON_MEMORY_ERROR if the packet couldn't be
+ * allocated
+ */
+static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
+ struct batadv_orig_node *orig_node,
+ u32 seqno, size_t len, const u8 *session,
+ int uid, u32 timestamp)
+{
+ struct batadv_icmp_tp_packet *icmp;
+ struct sk_buff *skb;
+ int r;
+ u8 *data;
+ size_t data_len;
+
+ skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN);
+ if (unlikely(!skb))
+ return BATADV_TP_REASON_MEMORY_ERROR;
+
+ skb_reserve(skb, ETH_HLEN);
+ icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+
+ /* fill the icmp header */
+ ether_addr_copy(icmp->dst, orig_node->orig);
+ ether_addr_copy(icmp->orig, src);
+ icmp->version = BATADV_COMPAT_VERSION;
+ icmp->packet_type = BATADV_ICMP;
+ icmp->ttl = BATADV_TTL;
+ icmp->msg_type = BATADV_TP;
+ icmp->uid = uid;
+
+ icmp->subtype = BATADV_TP_MSG;
+ memcpy(icmp->session, session, sizeof(icmp->session));
+ icmp->seqno = htonl(seqno);
+ icmp->timestamp = htonl(timestamp);
+
+ data_len = len - sizeof(*icmp);
+ data = (u8 *)skb_put(skb, data_len);
+ batadv_tp_fill_prerandom(tp_vars, data, data_len);
+
+ r = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (r == -1)
+ kfree_skb(skb);
+
+ if (r == NET_XMIT_SUCCESS)
+ return 0;
+
+ return BATADV_TP_REASON_CANT_SEND;
+}
+
+/**
+ * batadv_tp_recv_ack - ACK receiving function
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ *
+ * Process a received TP ACK packet
+ */
+static void batadv_tp_recv_ack(struct batadv_priv *bat_priv,
+ const struct sk_buff *skb)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_orig_node *orig_node = NULL;
+ const struct batadv_icmp_tp_packet *icmp;
+ struct batadv_tp_vars *tp_vars;
+ size_t packet_len, mss;
+ u32 rtt, recv_ack, cwnd;
+ unsigned char *dev_addr;
+
+ packet_len = BATADV_TP_PLEN;
+ mss = BATADV_TP_PLEN;
+ packet_len += sizeof(struct batadv_unicast_packet);
+
+ icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+ /* find the tp_vars */
+ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+ icmp->session);
+ if (unlikely(!tp_vars))
+ return;
+
+ if (unlikely(atomic_read(&tp_vars->sending) == 0))
+ goto out;
+
+ /* old ACK? silently drop it.. */
+ if (batadv_seq_before(ntohl(icmp->seqno),
+ (u32)atomic_read(&tp_vars->last_acked)))
+ goto out;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (unlikely(!primary_if))
+ goto out;
+
+ orig_node = batadv_orig_hash_find(bat_priv, icmp->orig);
+ if (unlikely(!orig_node))
+ goto out;
+
+ /* update RTO with the new sampled RTT, if any */
+ rtt = jiffies_to_msecs(jiffies) - ntohl(icmp->timestamp);
+ if (icmp->timestamp && rtt)
+ batadv_tp_update_rto(tp_vars, rtt);
+
+ /* ACK for new data... reset the timer */
+ batadv_tp_reset_sender_timer(tp_vars);
+
+ recv_ack = ntohl(icmp->seqno);
+
+ /* check if this ACK is a duplicate */
+ if (atomic_read(&tp_vars->last_acked) == recv_ack) {
+ atomic_inc(&tp_vars->dup_acks);
+ if (atomic_read(&tp_vars->dup_acks) != 3)
+ goto out;
+
+ if (recv_ack >= tp_vars->recover)
+ goto out;
+
+ /* if this is the third duplicate ACK do Fast Retransmit */
+ batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr,
+ orig_node, recv_ack, packet_len,
+ icmp->session, icmp->uid,
+ jiffies_to_msecs(jiffies));
+
+ spin_lock_bh(&tp_vars->cwnd_lock);
+
+ /* Fast Recovery */
+ tp_vars->fast_recovery = true;
+ /* Set recover to the last outstanding seqno when Fast Recovery
+ * is entered. RFC6582, Section 3.2, step 1
+ */
+ tp_vars->recover = tp_vars->last_sent;
+ tp_vars->ss_threshold = tp_vars->cwnd >> 1;
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: Fast Recovery, (cur cwnd=%u) ss_thr=%u last_sent=%u recv_ack=%u\n",
+ tp_vars->cwnd, tp_vars->ss_threshold,
+ tp_vars->last_sent, recv_ack);
+ tp_vars->cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 3 * mss,
+ mss);
+ tp_vars->dec_cwnd = 0;
+ tp_vars->last_sent = recv_ack;
+
+ spin_unlock_bh(&tp_vars->cwnd_lock);
+ } else {
+ /* count the acked data */
+ atomic64_add(recv_ack - atomic_read(&tp_vars->last_acked),
+ &tp_vars->tot_sent);
+ /* reset the duplicate ACKs counter */
+ atomic_set(&tp_vars->dup_acks, 0);
+
+ if (tp_vars->fast_recovery) {
+ /* partial ACK */
+ if (batadv_seq_before(recv_ack, tp_vars->recover)) {
+ /* this is another hole in the window. React
+ * immediately as specified by NewReno (see
+ * Section 3.2 of RFC6582 for details)
+ */
+ dev_addr = primary_if->net_dev->dev_addr;
+ batadv_tp_send_msg(tp_vars, dev_addr,
+ orig_node, recv_ack,
+ packet_len, icmp->session,
+ icmp->uid,
+ jiffies_to_msecs(jiffies));
+ tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd,
+ mss, mss);
+ } else {
+ tp_vars->fast_recovery = false;
+ /* set cwnd to the value of ss_threshold at the
+ * moment that Fast Recovery was entered.
+ * RFC6582, Section 3.2, step 3
+ */
+ cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 0,
+ mss);
+ tp_vars->cwnd = cwnd;
+ }
+ goto move_twnd;
+ }
+
+ if (recv_ack - atomic_read(&tp_vars->last_acked) >= mss)
+ batadv_tp_update_cwnd(tp_vars, mss);
+move_twnd:
+ /* move the Transmit Window */
+ atomic_set(&tp_vars->last_acked, recv_ack);
+ }
+
+ wake_up(&tp_vars->more_bytes);
+out:
+ if (likely(primary_if))
+ batadv_hardif_put(primary_if);
+ if (likely(orig_node))
+ batadv_orig_node_put(orig_node);
+ if (likely(tp_vars))
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_avail - check if congestion window is not full
+ * @tp_vars: the private data of the current TP meter session
+ * @payload_len: size of the payload of a single message
+ *
+ * Return: true when congestion window is not full, false otherwise
+ */
+static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars,
+ size_t payload_len)
+{
+ u32 win_left, win_limit;
+
+ win_limit = atomic_read(&tp_vars->last_acked) + tp_vars->cwnd;
+ win_left = win_limit - tp_vars->last_sent;
+
+ return win_left >= payload_len;
+}
+
+/**
+ * batadv_tp_wait_available - wait until congestion window becomes free or
+ * timeout is reached
+ * @tp_vars: the private data of the current TP meter session
+ * @plen: size of the payload of a single message
+ *
+ * Return: 0 if the condition evaluated to false after the timeout elapsed,
+ * 1 if the condition evaluated to true after the timeout elapsed, the
+ * remaining jiffies (at least 1) if the condition evaluated to true before
+ * the timeout elapsed, or -ERESTARTSYS if it was interrupted by a signal.
+ */
+static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen)
+{
+ int ret;
+
+ ret = wait_event_interruptible_timeout(tp_vars->more_bytes,
+ batadv_tp_avail(tp_vars, plen),
+ HZ / 10);
+
+ return ret;
+}
+
+/**
+ * batadv_tp_send - main sending thread of a tp meter session
+ * @arg: address of the related tp_vars
+ *
+ * Return: nothing, this function never returns
+ */
+static int batadv_tp_send(void *arg)
+{
+ struct batadv_tp_vars *tp_vars = arg;
+ struct batadv_priv *bat_priv = tp_vars->bat_priv;
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_orig_node *orig_node = NULL;
+ size_t payload_len, packet_len;
+ int err = 0;
+
+ if (unlikely(tp_vars->role != BATADV_TP_SENDER)) {
+ err = BATADV_TP_REASON_DST_UNREACHABLE;
+ tp_vars->reason = err;
+ goto out;
+ }
+
+ orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end);
+ if (unlikely(!orig_node)) {
+ err = BATADV_TP_REASON_DST_UNREACHABLE;
+ tp_vars->reason = err;
+ goto out;
+ }
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (unlikely(!primary_if)) {
+ err = BATADV_TP_REASON_DST_UNREACHABLE;
+ goto out;
+ }
+
+ /* assume that all the hard_interfaces have a correctly
+ * configured MTU, so use the soft_iface MTU as MSS.
+ * This might not be true and in that case the fragmentation
+ * should be used.
+ * Now, try to send the packet as it is
+ */
+ payload_len = BATADV_TP_PLEN;
+ BUILD_BUG_ON(sizeof(struct batadv_icmp_tp_packet) > BATADV_TP_PLEN);
+
+ batadv_tp_reset_sender_timer(tp_vars);
+
+ /* queue the worker in charge of terminating the test */
+ queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work,
+ msecs_to_jiffies(tp_vars->test_length));
+
+ while (atomic_read(&tp_vars->sending) != 0) {
+ if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) {
+ batadv_tp_wait_available(tp_vars, payload_len);
+ continue;
+ }
+
+ /* to emulate normal unicast traffic, add to the payload len
+ * the size of the unicast header
+ */
+ packet_len = payload_len + sizeof(struct batadv_unicast_packet);
+
+ err = batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr,
+ orig_node, tp_vars->last_sent,
+ packet_len,
+ tp_vars->session, tp_vars->icmp_uid,
+ jiffies_to_msecs(jiffies));
+
+ /* something went wrong during the preparation/transmission */
+ if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: batadv_tp_send() cannot send packets (%d)\n",
+ err);
+ /* ensure nobody else tries to stop the thread now */
+ if (atomic_dec_and_test(&tp_vars->sending))
+ tp_vars->reason = err;
+ break;
+ }
+
+ /* right-shift the TWND */
+ if (!err)
+ tp_vars->last_sent += payload_len;
+
+ cond_resched();
+ }
+
+out:
+ if (likely(primary_if))
+ batadv_hardif_put(primary_if);
+ if (likely(orig_node))
+ batadv_orig_node_put(orig_node);
+
+ batadv_tp_sender_end(bat_priv, tp_vars);
+ batadv_tp_sender_cleanup(bat_priv, tp_vars);
+
+ batadv_tp_vars_put(tp_vars);
+
+ do_exit(0);
+}
+
+/**
+ * batadv_tp_start_kthread - start new thread which manages the tp meter sender
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
+{
+ struct task_struct *kthread;
+ struct batadv_priv *bat_priv = tp_vars->bat_priv;
+ u32 session_cookie;
+
+ kref_get(&tp_vars->refcount);
+ kthread = kthread_create(batadv_tp_send, tp_vars, "kbatadv_tp_meter");
+ if (IS_ERR(kthread)) {
+ session_cookie = batadv_tp_session_cookie(tp_vars->session,
+ tp_vars->icmp_uid);
+ pr_err("batadv: cannot create tp meter kthread\n");
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
+ tp_vars->other_end,
+ bat_priv, session_cookie);
+
+ /* drop reserved reference for kthread */
+ batadv_tp_vars_put(tp_vars);
+
+ /* cleanup of failed tp meter variables */
+ batadv_tp_sender_cleanup(bat_priv, tp_vars);
+ return;
+ }
+
+ wake_up_process(kthread);
+}
+
+/**
+ * batadv_tp_start - start a new tp meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the receiver MAC address
+ * @test_length: test length in milliseconds
+ * @cookie: session cookie
+ */
+void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
+ u32 test_length, u32 *cookie)
+{
+ struct batadv_tp_vars *tp_vars;
+ u8 session_id[2];
+ u8 icmp_uid;
+ u32 session_cookie;
+
+ get_random_bytes(session_id, sizeof(session_id));
+ get_random_bytes(&icmp_uid, 1);
+ session_cookie = batadv_tp_session_cookie(session_id, icmp_uid);
+ *cookie = session_cookie;
+
+ /* look for an already existing test towards this node */
+ spin_lock_bh(&bat_priv->tp_list_lock);
+ tp_vars = batadv_tp_list_find(bat_priv, dst);
+ if (tp_vars) {
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+ batadv_tp_vars_put(tp_vars);
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: test to or from the same node already ongoing, aborting\n");
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING,
+ dst, bat_priv, session_cookie);
+ return;
+ }
+
+ if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) {
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: too many ongoing sessions, aborting (SEND)\n");
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_TOO_MANY, dst,
+ bat_priv, session_cookie);
+ return;
+ }
+
+ tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC);
+ if (!tp_vars) {
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: batadv_tp_start cannot allocate list elements\n");
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
+ dst, bat_priv, session_cookie);
+ return;
+ }
+
+ /* initialize tp_vars */
+ ether_addr_copy(tp_vars->other_end, dst);
+ kref_init(&tp_vars->refcount);
+ tp_vars->role = BATADV_TP_SENDER;
+ atomic_set(&tp_vars->sending, 1);
+ memcpy(tp_vars->session, session_id, sizeof(session_id));
+ tp_vars->icmp_uid = icmp_uid;
+
+ tp_vars->last_sent = BATADV_TP_FIRST_SEQ;
+ atomic_set(&tp_vars->last_acked, BATADV_TP_FIRST_SEQ);
+ tp_vars->fast_recovery = false;
+ tp_vars->recover = BATADV_TP_FIRST_SEQ;
+
+ /* initialise the CWND to 3*MSS (Section 3.1 in RFC5681).
+ * For batman-adv the MSS is the size of the payload received by the
+ * soft_interface, hence its MTU
+ */
+ tp_vars->cwnd = BATADV_TP_PLEN * 3;
+ /* at the beginning initialise the SS threshold to the biggest possible
+ * window size, hence the AWND size
+ */
+ tp_vars->ss_threshold = BATADV_TP_AWND;
+
+ /* RTO initial value is 3 seconds.
+ * Details in Section 2.1 of RFC6298
+ */
+ tp_vars->rto = 1000;
+ tp_vars->srtt = 0;
+ tp_vars->rttvar = 0;
+
+ atomic64_set(&tp_vars->tot_sent, 0);
+
+ kref_get(&tp_vars->refcount);
+ setup_timer(&tp_vars->timer, batadv_tp_sender_timeout,
+ (unsigned long)tp_vars);
+
+ tp_vars->bat_priv = bat_priv;
+ tp_vars->start_time = jiffies;
+
+ init_waitqueue_head(&tp_vars->more_bytes);
+
+ spin_lock_init(&tp_vars->unacked_lock);
+ INIT_LIST_HEAD(&tp_vars->unacked_list);
+
+ spin_lock_init(&tp_vars->cwnd_lock);
+
+ tp_vars->prerandom_offset = 0;
+ spin_lock_init(&tp_vars->prerandom_lock);
+
+ kref_get(&tp_vars->refcount);
+ hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+
+ tp_vars->test_length = test_length;
+ if (!tp_vars->test_length)
+ tp_vars->test_length = BATADV_TP_DEF_TEST_LENGTH;
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: starting throughput meter towards %pM (length=%ums)\n",
+ dst, test_length);
+
+ /* init work item for finished tp tests */
+ INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish);
+
+ /* start tp kthread. This way the write() call issued from userspace can
+ * happily return and avoid to block
+ */
+ batadv_tp_start_kthread(tp_vars);
+
+ /* don't return reference to new tp_vars */
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_stop - stop currently running tp meter session
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the receiver MAC address
+ * @return_value: reason for tp meter session stop
+ */
+void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
+ u8 return_value)
+{
+ struct batadv_orig_node *orig_node;
+ struct batadv_tp_vars *tp_vars;
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: stopping test towards %pM\n", dst);
+
+ orig_node = batadv_orig_hash_find(bat_priv, dst);
+ if (!orig_node)
+ return;
+
+ tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig);
+ if (!tp_vars) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: trying to interrupt an already over connection\n");
+ goto out;
+ }
+
+ batadv_tp_sender_shutdown(tp_vars, return_value);
+ batadv_tp_vars_put(tp_vars);
+out:
+ batadv_orig_node_put(orig_node);
+}
+
+/**
+ * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer
+ * @tp_vars: the private data of the current TP meter session
+ *
+ * start the receiver shutdown timer or reset it if already started
+ */
+static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars)
+{
+ mod_timer(&tp_vars->timer,
+ jiffies + msecs_to_jiffies(BATADV_TP_RECV_TIMEOUT));
+}
+
+/**
+ * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is
+ * reached without received ack
+ * @arg: address of the related tp_vars
+ */
+static void batadv_tp_receiver_shutdown(unsigned long arg)
+{
+ struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg;
+ struct batadv_tp_unacked *un, *safe;
+ struct batadv_priv *bat_priv;
+
+ bat_priv = tp_vars->bat_priv;
+
+ /* if there is recent activity rearm the timer */
+ if (!batadv_has_timed_out(tp_vars->last_recv_time,
+ BATADV_TP_RECV_TIMEOUT)) {
+ /* reset the receiver shutdown timer */
+ batadv_tp_reset_receiver_timer(tp_vars);
+ return;
+ }
+
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Shutting down for inactivity (more than %dms) from %pM\n",
+ BATADV_TP_RECV_TIMEOUT, tp_vars->other_end);
+
+ spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
+ hlist_del_rcu(&tp_vars->list);
+ spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+
+ /* drop list reference */
+ batadv_tp_vars_put(tp_vars);
+
+ atomic_dec(&bat_priv->tp_num);
+
+ spin_lock_bh(&tp_vars->unacked_lock);
+ list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+ list_del(&un->list);
+ kfree(un);
+ }
+ spin_unlock_bh(&tp_vars->unacked_lock);
+
+ /* drop reference of timer */
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_send_ack - send an ACK packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @dst: the mac address of the destination originator
+ * @seq: the sequence number to ACK
+ * @timestamp: the timestamp to echo back in the ACK
+ * @session: session identifier
+ * @socket_index: local ICMP socket identifier
+ *
+ * Return: 0 on success, a positive integer representing the reason of the
+ * failure otherwise
+ */
+static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
+ u32 seq, __be32 timestamp, const u8 *session,
+ int socket_index)
+{
+ struct batadv_hard_iface *primary_if = NULL;
+ struct batadv_orig_node *orig_node;
+ struct batadv_icmp_tp_packet *icmp;
+ struct sk_buff *skb;
+ int r, ret;
+
+ orig_node = batadv_orig_hash_find(bat_priv, dst);
+ if (unlikely(!orig_node)) {
+ ret = BATADV_TP_REASON_DST_UNREACHABLE;
+ goto out;
+ }
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (unlikely(!primary_if)) {
+ ret = BATADV_TP_REASON_DST_UNREACHABLE;
+ goto out;
+ }
+
+ skb = netdev_alloc_skb_ip_align(NULL, sizeof(*icmp) + ETH_HLEN);
+ if (unlikely(!skb)) {
+ ret = BATADV_TP_REASON_MEMORY_ERROR;
+ goto out;
+ }
+
+ skb_reserve(skb, ETH_HLEN);
+ icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp));
+ icmp->packet_type = BATADV_ICMP;
+ icmp->version = BATADV_COMPAT_VERSION;
+ icmp->ttl = BATADV_TTL;
+ icmp->msg_type = BATADV_TP;
+ ether_addr_copy(icmp->dst, orig_node->orig);
+ ether_addr_copy(icmp->orig, primary_if->net_dev->dev_addr);
+ icmp->uid = socket_index;
+
+ icmp->subtype = BATADV_TP_ACK;
+ memcpy(icmp->session, session, sizeof(icmp->session));
+ icmp->seqno = htonl(seq);
+ icmp->timestamp = timestamp;
+
+ /* send the ack */
+ r = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (r == -1)
+ kfree_skb(skb);
+
+ if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+ ret = BATADV_TP_REASON_DST_UNREACHABLE;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (likely(orig_node))
+ batadv_orig_node_put(orig_node);
+ if (likely(primary_if))
+ batadv_hardif_put(primary_if);
+
+ return ret;
+}
+
+/**
+ * batadv_tp_handle_out_of_order - store an out of order packet
+ * @tp_vars: the private data of the current TP meter session
+ * @skb: the buffer containing the received packet
+ *
+ * Store the out of order packet in the unacked list for late processing. This
+ * packets are kept in this list so that they can be ACKed at once as soon as
+ * all the previous packets have been received
+ *
+ * Return: true if the packed has been successfully processed, false otherwise
+ */
+static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars,
+ const struct sk_buff *skb)
+{
+ const struct batadv_icmp_tp_packet *icmp;
+ struct batadv_tp_unacked *un, *new;
+ u32 payload_len;
+ bool added = false;
+
+ new = kmalloc(sizeof(*new), GFP_ATOMIC);
+ if (unlikely(!new))
+ return false;
+
+ icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+ new->seqno = ntohl(icmp->seqno);
+ payload_len = skb->len - sizeof(struct batadv_unicast_packet);
+ new->len = payload_len;
+
+ spin_lock_bh(&tp_vars->unacked_lock);
+ /* if the list is empty immediately attach this new object */
+ if (list_empty(&tp_vars->unacked_list)) {
+ list_add(&new->list, &tp_vars->unacked_list);
+ goto out;
+ }
+
+ /* otherwise loop over the list and either drop the packet because this
+ * is a duplicate or store it at the right position.
+ *
+ * The iteration is done in the reverse way because it is likely that
+ * the last received packet (the one being processed now) has a bigger
+ * seqno than all the others already stored.
+ */
+ list_for_each_entry_reverse(un, &tp_vars->unacked_list, list) {
+ /* check for duplicates */
+ if (new->seqno == un->seqno) {
+ if (new->len > un->len)
+ un->len = new->len;
+ kfree(new);
+ added = true;
+ break;
+ }
+
+ /* look for the right position */
+ if (batadv_seq_before(new->seqno, un->seqno))
+ continue;
+
+ /* as soon as an entry having a bigger seqno is found, the new
+ * one is attached _after_ it. In this way the list is kept in
+ * ascending order
+ */
+ list_add_tail(&new->list, &un->list);
+ added = true;
+ break;
+ }
+
+ /* received packet with smallest seqno out of order; add it to front */
+ if (!added)
+ list_add(&new->list, &tp_vars->unacked_list);
+
+out:
+ spin_unlock_bh(&tp_vars->unacked_lock);
+
+ return true;
+}
+
+/**
+ * batadv_tp_ack_unordered - update number received bytes in current stream
+ * without gaps
+ * @tp_vars: the private data of the current TP meter session
+ */
+static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars)
+{
+ struct batadv_tp_unacked *un, *safe;
+ u32 to_ack;
+
+ /* go through the unacked packet list and possibly ACK them as
+ * well
+ */
+ spin_lock_bh(&tp_vars->unacked_lock);
+ list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
+ /* the list is ordered, therefore it is possible to stop as soon
+ * there is a gap between the last acked seqno and the seqno of
+ * the packet under inspection
+ */
+ if (batadv_seq_before(tp_vars->last_recv, un->seqno))
+ break;
+
+ to_ack = un->seqno + un->len - tp_vars->last_recv;
+
+ if (batadv_seq_before(tp_vars->last_recv, un->seqno + un->len))
+ tp_vars->last_recv += to_ack;
+
+ list_del(&un->list);
+ kfree(un);
+ }
+ spin_unlock_bh(&tp_vars->unacked_lock);
+}
+
+/**
+ * batadv_tp_init_recv - return matching or create new receiver tp_vars
+ * @bat_priv: the bat priv with all the soft interface information
+ * @icmp: received icmp tp msg
+ *
+ * Return: corresponding tp_vars or NULL on errors
+ */
+static struct batadv_tp_vars *
+batadv_tp_init_recv(struct batadv_priv *bat_priv,
+ const struct batadv_icmp_tp_packet *icmp)
+{
+ struct batadv_tp_vars *tp_vars;
+
+ spin_lock_bh(&bat_priv->tp_list_lock);
+ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+ icmp->session);
+ if (tp_vars)
+ goto out_unlock;
+
+ if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: too many ongoing sessions, aborting (RECV)\n");
+ goto out_unlock;
+ }
+
+ tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC);
+ if (!tp_vars)
+ goto out_unlock;
+
+ ether_addr_copy(tp_vars->other_end, icmp->orig);
+ tp_vars->role = BATADV_TP_RECEIVER;
+ memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session));
+ tp_vars->last_recv = BATADV_TP_FIRST_SEQ;
+ tp_vars->bat_priv = bat_priv;
+ kref_init(&tp_vars->refcount);
+
+ spin_lock_init(&tp_vars->unacked_lock);
+ INIT_LIST_HEAD(&tp_vars->unacked_list);
+
+ kref_get(&tp_vars->refcount);
+ hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list);
+
+ kref_get(&tp_vars->refcount);
+ setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown,
+ (unsigned long)tp_vars);
+
+ batadv_tp_reset_receiver_timer(tp_vars);
+
+out_unlock:
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+
+ return tp_vars;
+}
+
+/**
+ * batadv_tp_recv_msg - process a single data message
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ *
+ * Process a received TP MSG packet
+ */
+static void batadv_tp_recv_msg(struct batadv_priv *bat_priv,
+ const struct sk_buff *skb)
+{
+ const struct batadv_icmp_tp_packet *icmp;
+ struct batadv_tp_vars *tp_vars;
+ size_t packet_size;
+ u32 seqno;
+
+ icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+ seqno = ntohl(icmp->seqno);
+ /* check if this is the first seqno. This means that if the
+ * first packet is lost, the tp meter does not work anymore!
+ */
+ if (seqno == BATADV_TP_FIRST_SEQ) {
+ tp_vars = batadv_tp_init_recv(bat_priv, icmp);
+ if (!tp_vars) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: seqno != BATADV_TP_FIRST_SEQ cannot initiate connection\n");
+ goto out;
+ }
+ } else {
+ tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
+ icmp->session);
+ if (!tp_vars) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Unexpected packet from %pM!\n",
+ icmp->orig);
+ goto out;
+ }
+ }
+
+ if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) {
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Meter: dropping packet: not expected (role=%u)\n",
+ tp_vars->role);
+ goto out;
+ }
+
+ tp_vars->last_recv_time = jiffies;
+
+ /* if the packet is a duplicate, it may be the case that an ACK has been
+ * lost. Resend the ACK
+ */
+ if (batadv_seq_before(seqno, tp_vars->last_recv))
+ goto send_ack;
+
+ /* if the packet is out of order enqueue it */
+ if (ntohl(icmp->seqno) != tp_vars->last_recv) {
+ /* exit immediately (and do not send any ACK) if the packet has
+ * not been enqueued correctly
+ */
+ if (!batadv_tp_handle_out_of_order(tp_vars, skb))
+ goto out;
+
+ /* send a duplicate ACK */
+ goto send_ack;
+ }
+
+ /* if everything was fine count the ACKed bytes */
+ packet_size = skb->len - sizeof(struct batadv_unicast_packet);
+ tp_vars->last_recv += packet_size;
+
+ /* check if this ordered message filled a gap.... */
+ batadv_tp_ack_unordered(tp_vars);
+
+send_ack:
+ /* send the ACK. If the received packet was out of order, the ACK that
+ * is going to be sent is a duplicate (the sender will count them and
+ * possibly enter Fast Retransmit as soon as it has reached 3)
+ */
+ batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv,
+ icmp->timestamp, icmp->session, icmp->uid);
+out:
+ if (likely(tp_vars))
+ batadv_tp_vars_put(tp_vars);
+}
+
+/**
+ * batadv_tp_meter_recv - main TP Meter receiving function
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the buffer containing the received packet
+ */
+void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
+{
+ struct batadv_icmp_tp_packet *icmp;
+
+ icmp = (struct batadv_icmp_tp_packet *)skb->data;
+
+ switch (icmp->subtype) {
+ case BATADV_TP_MSG:
+ batadv_tp_recv_msg(bat_priv, skb);
+ break;
+ case BATADV_TP_ACK:
+ batadv_tp_recv_ack(bat_priv, skb);
+ break;
+ default:
+ batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
+ "Received unknown TP Metric packet type %u\n",
+ icmp->subtype);
+ }
+ consume_skb(skb);
+}
+
+/**
+ * batadv_tp_meter_init - initialize global tp_meter structures
+ */
+void batadv_tp_meter_init(void)
+{
+ get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom));
+}
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
new file mode 100644
index 000000000000..ba922c425e56
--- /dev/null
+++ b/net/batman-adv/tp_meter.h
@@ -0,0 +1,34 @@
+/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
+ *
+ * Edo Monticelli, Antonio Quartulli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_TP_METER_H_
+#define _NET_BATMAN_ADV_TP_METER_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+struct sk_buff;
+
+void batadv_tp_meter_init(void);
+void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
+ u32 test_length, u32 *cookie);
+void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
+ u8 return_value);
+void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
+#endif /* _NET_BATMAN_ADV_TP_METER_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 57ec87f37050..7e6df7a4964a 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -47,10 +47,12 @@
#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
+#include "log.h"
#include "multicast.h"
#include "originator.h"
#include "packet.h"
#include "soft-interface.h"
+#include "tvlv.h"
/* hash class keys */
static struct lock_class_key batadv_tt_local_hash_lock_class_key;
@@ -996,7 +998,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
struct batadv_tt_local_entry *tt_local;
struct batadv_hard_iface *primary_if;
struct hlist_head *head;
- unsigned short vid;
u32 i;
int last_seen_secs;
int last_seen_msecs;
@@ -1023,7 +1024,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
tt_local = container_of(tt_common_entry,
struct batadv_tt_local_entry,
common);
- vid = tt_common_entry->vid;
last_seen_jiffies = jiffies - tt_local->last_seen;
last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
last_seen_secs = last_seen_msecs / 1000;
@@ -1547,7 +1547,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry *tt_global_entry)
{
struct batadv_neigh_node *router, *best_router = NULL;
- struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+ struct batadv_algo_ops *bao = bat_priv->algo_ops;
struct hlist_head *head;
struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
@@ -1559,8 +1559,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
continue;
if (best_router &&
- bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT,
- best_router, BATADV_IF_DEFAULT) <= 0) {
+ bao->neigh.cmp(router, BATADV_IF_DEFAULT, best_router,
+ BATADV_IF_DEFAULT) <= 0) {
batadv_neigh_node_put(router);
continue;
}
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
new file mode 100644
index 000000000000..3d1cf0fb112d
--- /dev/null
+++ b/net/batman-adv/tvlv.c
@@ -0,0 +1,632 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "main.h"
+
+#include <linux/byteorder/generic.h>
+#include <linux/etherdevice.h>
+#include <linux/fs.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "originator.h"
+#include "packet.h"
+#include "send.h"
+#include "tvlv.h"
+
+/**
+ * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
+ * free after rcu grace period
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_handler_release(struct kref *ref)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
+ kfree_rcu(tvlv_handler, rcu);
+}
+
+/**
+ * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
+ * possibly release it
+ * @tvlv_handler: the tvlv handler to free
+ */
+static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
+{
+ kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
+}
+
+/**
+ * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list
+ * based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to look for
+ * @version: tvlv handler version to look for
+ *
+ * Return: tvlv handler if found or NULL otherwise.
+ */
+static struct batadv_tvlv_handler *
+batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+{
+ struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tvlv_handler_tmp,
+ &bat_priv->tvlv.handler_list, list) {
+ if (tvlv_handler_tmp->type != type)
+ continue;
+
+ if (tvlv_handler_tmp->version != version)
+ continue;
+
+ if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
+ continue;
+
+ tvlv_handler = tvlv_handler_tmp;
+ break;
+ }
+ rcu_read_unlock();
+
+ return tvlv_handler;
+}
+
+/**
+ * batadv_tvlv_container_release - release tvlv from lists and free
+ * @ref: kref pointer of the tvlv
+ */
+static void batadv_tvlv_container_release(struct kref *ref)
+{
+ struct batadv_tvlv_container *tvlv;
+
+ tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
+ kfree(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_put - decrement the tvlv container refcounter and
+ * possibly release it
+ * @tvlv: the tvlv container to free
+ */
+static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
+{
+ kref_put(&tvlv->refcount, batadv_tvlv_container_release);
+}
+
+/**
+ * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container
+ * list based on the provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to look for
+ * @version: tvlv container version to look for
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Return: tvlv container if found or NULL otherwise.
+ */
+static struct batadv_tvlv_container *
+batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
+{
+ struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
+
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+ hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
+ if (tvlv_tmp->tvlv_hdr.type != type)
+ continue;
+
+ if (tvlv_tmp->tvlv_hdr.version != version)
+ continue;
+
+ kref_get(&tvlv_tmp->refcount);
+ tvlv = tvlv_tmp;
+ break;
+ }
+
+ return tvlv;
+}
+
+/**
+ * batadv_tvlv_container_list_size - calculate the size of the tvlv container
+ * list entries
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ *
+ * Return: size of all currently registered tvlv containers in bytes.
+ */
+static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
+{
+ struct batadv_tvlv_container *tvlv;
+ u16 tvlv_len = 0;
+
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+ hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+ tvlv_len += sizeof(struct batadv_tvlv_hdr);
+ tvlv_len += ntohs(tvlv->tvlv_hdr.len);
+ }
+
+ return tvlv_len;
+}
+
+/**
+ * batadv_tvlv_container_remove - remove tvlv container from the tvlv container
+ * list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv: the to be removed tvlv container
+ *
+ * Has to be called with the appropriate locks being acquired
+ * (tvlv.container_list_lock).
+ */
+static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_container *tvlv)
+{
+ lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
+
+ if (!tvlv)
+ return;
+
+ hlist_del(&tvlv->list);
+
+ /* first call to decrement the counter, second call to free */
+ batadv_tvlv_container_put(tvlv);
+ batadv_tvlv_container_put(tvlv);
+}
+
+/**
+ * batadv_tvlv_container_unregister - unregister tvlv container based on the
+ * provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type to unregister
+ * @version: tvlv container type to unregister
+ */
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+ u8 type, u8 version)
+{
+ struct batadv_tvlv_container *tvlv;
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv = batadv_tvlv_container_get(bat_priv, type, version);
+ batadv_tvlv_container_remove(bat_priv, tvlv);
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_container_register - register tvlv type, version and content
+ * to be propagated with each (primary interface) OGM
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv container type
+ * @version: tvlv container version
+ * @tvlv_value: tvlv container content
+ * @tvlv_value_len: tvlv container content length
+ *
+ * If a container of the same type and version was already registered the new
+ * content is going to replace the old one.
+ */
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+ u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len)
+{
+ struct batadv_tvlv_container *tvlv_old, *tvlv_new;
+
+ if (!tvlv_value)
+ tvlv_value_len = 0;
+
+ tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC);
+ if (!tvlv_new)
+ return;
+
+ tvlv_new->tvlv_hdr.version = version;
+ tvlv_new->tvlv_hdr.type = type;
+ tvlv_new->tvlv_hdr.len = htons(tvlv_value_len);
+
+ memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
+ INIT_HLIST_NODE(&tvlv_new->list);
+ kref_init(&tvlv_new->refcount);
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
+ batadv_tvlv_container_remove(bat_priv, tvlv_old);
+ hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+}
+
+/**
+ * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate
+ * requested packet size
+ * @packet_buff: packet buffer
+ * @packet_buff_len: packet buffer size
+ * @min_packet_len: requested packet minimum size
+ * @additional_packet_len: requested additional packet size on top of minimum
+ * size
+ *
+ * Return: true of the packet buffer could be changed to the requested size,
+ * false otherwise.
+ */
+static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
+ int *packet_buff_len,
+ int min_packet_len,
+ int additional_packet_len)
+{
+ unsigned char *new_buff;
+
+ new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC);
+
+ /* keep old buffer if kmalloc should fail */
+ if (!new_buff)
+ return false;
+
+ memcpy(new_buff, *packet_buff, min_packet_len);
+ kfree(*packet_buff);
+ *packet_buff = new_buff;
+ *packet_buff_len = min_packet_len + additional_packet_len;
+
+ return true;
+}
+
+/**
+ * batadv_tvlv_container_ogm_append - append tvlv container content to given
+ * OGM packet buffer
+ * @bat_priv: the bat priv with all the soft interface information
+ * @packet_buff: ogm packet buffer
+ * @packet_buff_len: ogm packet buffer size including ogm header and tvlv
+ * content
+ * @packet_min_len: ogm header size to be preserved for the OGM itself
+ *
+ * The ogm packet might be enlarged or shrunk depending on the current size
+ * and the size of the to-be-appended tvlv containers.
+ *
+ * Return: size of all appended tvlv containers in bytes.
+ */
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+ unsigned char **packet_buff,
+ int *packet_buff_len, int packet_min_len)
+{
+ struct batadv_tvlv_container *tvlv;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ u16 tvlv_value_len;
+ void *tvlv_value;
+ bool ret;
+
+ spin_lock_bh(&bat_priv->tvlv.container_list_lock);
+ tvlv_value_len = batadv_tvlv_container_list_size(bat_priv);
+
+ ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len,
+ packet_min_len, tvlv_value_len);
+
+ if (!ret)
+ goto end;
+
+ if (!tvlv_value_len)
+ goto end;
+
+ tvlv_value = (*packet_buff) + packet_min_len;
+
+ hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
+ tvlv_hdr = tvlv_value;
+ tvlv_hdr->type = tvlv->tvlv_hdr.type;
+ tvlv_hdr->version = tvlv->tvlv_hdr.version;
+ tvlv_hdr->len = tvlv->tvlv_hdr.len;
+ tvlv_value = tvlv_hdr + 1;
+ memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len));
+ tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len);
+ }
+
+end:
+ spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+ return tvlv_value_len;
+}
+
+/**
+ * batadv_tvlv_call_handler - parse the given tvlv buffer to call the
+ * appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @tvlv_handler: tvlv callback function handling the tvlv content
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Return: success if handler was not found or the return value of the handler
+ * callback.
+ */
+static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
+ struct batadv_tvlv_handler *tvlv_handler,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ u8 *src, u8 *dst,
+ void *tvlv_value, u16 tvlv_value_len)
+{
+ if (!tvlv_handler)
+ return NET_RX_SUCCESS;
+
+ if (ogm_source) {
+ if (!tvlv_handler->ogm_handler)
+ return NET_RX_SUCCESS;
+
+ if (!orig_node)
+ return NET_RX_SUCCESS;
+
+ tvlv_handler->ogm_handler(bat_priv, orig_node,
+ BATADV_NO_FLAGS,
+ tvlv_value, tvlv_value_len);
+ tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
+ } else {
+ if (!src)
+ return NET_RX_SUCCESS;
+
+ if (!dst)
+ return NET_RX_SUCCESS;
+
+ if (!tvlv_handler->unicast_handler)
+ return NET_RX_SUCCESS;
+
+ return tvlv_handler->unicast_handler(bat_priv, src,
+ dst, tvlv_value,
+ tvlv_value_len);
+ }
+
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_containers_process - parse the given tvlv buffer to call the
+ * appropriate handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @orig_node: orig node emitting the ogm packet
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ *
+ * Return: success when processing an OGM or the return value of all called
+ * handler callbacks.
+ */
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ u8 *src, u8 *dst,
+ void *tvlv_value, u16 tvlv_value_len)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ u16 tvlv_value_cont_len;
+ u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND;
+ int ret = NET_RX_SUCCESS;
+
+ while (tvlv_value_len >= sizeof(*tvlv_hdr)) {
+ tvlv_hdr = tvlv_value;
+ tvlv_value_cont_len = ntohs(tvlv_hdr->len);
+ tvlv_value = tvlv_hdr + 1;
+ tvlv_value_len -= sizeof(*tvlv_hdr);
+
+ if (tvlv_value_cont_len > tvlv_value_len)
+ break;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv,
+ tvlv_hdr->type,
+ tvlv_hdr->version);
+
+ ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
+ ogm_source, orig_node,
+ src, dst, tvlv_value,
+ tvlv_value_cont_len);
+ if (tvlv_handler)
+ batadv_tvlv_handler_put(tvlv_handler);
+ tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
+ tvlv_value_len -= tvlv_value_cont_len;
+ }
+
+ if (!ogm_source)
+ return ret;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tvlv_handler,
+ &bat_priv->tvlv.handler_list, list) {
+ if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
+ !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
+ tvlv_handler->ogm_handler(bat_priv, orig_node,
+ cifnotfound, NULL, 0);
+
+ tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED;
+ }
+ rcu_read_unlock();
+
+ return NET_RX_SUCCESS;
+}
+
+/**
+ * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate
+ * handlers
+ * @bat_priv: the bat priv with all the soft interface information
+ * @batadv_ogm_packet: ogm packet containing the tvlv containers
+ * @orig_node: orig node emitting the ogm packet
+ */
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+ struct batadv_ogm_packet *batadv_ogm_packet,
+ struct batadv_orig_node *orig_node)
+{
+ void *tvlv_value;
+ u16 tvlv_value_len;
+
+ if (!batadv_ogm_packet)
+ return;
+
+ tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len);
+ if (!tvlv_value_len)
+ return;
+
+ tvlv_value = batadv_ogm_packet + 1;
+
+ batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
+ tvlv_value, tvlv_value_len);
+}
+
+/**
+ * batadv_tvlv_handler_register - register tvlv handler based on the provided
+ * type and version (both need to match) for ogm tvlv payload and/or unicast
+ * payload
+ * @bat_priv: the bat priv with all the soft interface information
+ * @optr: ogm tvlv handler callback function. This function receives the orig
+ * node, flags and the tvlv content as argument to process.
+ * @uptr: unicast tvlv handler callback function. This function receives the
+ * source & destination of the unicast packet as well as the tvlv content
+ * to process.
+ * @type: tvlv handler type to be registered
+ * @version: tvlv handler version to be registered
+ * @flags: flags to enable or disable TVLV API behavior
+ */
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+ void (*optr)(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 flags,
+ void *tvlv_value,
+ u16 tvlv_value_len),
+ int (*uptr)(struct batadv_priv *bat_priv,
+ u8 *src, u8 *dst,
+ void *tvlv_value,
+ u16 tvlv_value_len),
+ u8 type, u8 version, u8 flags)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+ if (tvlv_handler) {
+ batadv_tvlv_handler_put(tvlv_handler);
+ return;
+ }
+
+ tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
+ if (!tvlv_handler)
+ return;
+
+ tvlv_handler->ogm_handler = optr;
+ tvlv_handler->unicast_handler = uptr;
+ tvlv_handler->type = type;
+ tvlv_handler->version = version;
+ tvlv_handler->flags = flags;
+ kref_init(&tvlv_handler->refcount);
+ INIT_HLIST_NODE(&tvlv_handler->list);
+
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+ hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+}
+
+/**
+ * batadv_tvlv_handler_unregister - unregister tvlv handler based on the
+ * provided type and version (both need to match)
+ * @bat_priv: the bat priv with all the soft interface information
+ * @type: tvlv handler type to be unregistered
+ * @version: tvlv handler version to be unregistered
+ */
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+ u8 type, u8 version)
+{
+ struct batadv_tvlv_handler *tvlv_handler;
+
+ tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
+ if (!tvlv_handler)
+ return;
+
+ batadv_tvlv_handler_put(tvlv_handler);
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+ hlist_del_rcu(&tvlv_handler->list);
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+ batadv_tvlv_handler_put(tvlv_handler);
+}
+
+/**
+ * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the
+ * specified host
+ * @bat_priv: the bat priv with all the soft interface information
+ * @src: source mac address of the unicast packet
+ * @dst: destination mac address of the unicast packet
+ * @type: tvlv type
+ * @version: tvlv version
+ * @tvlv_value: tvlv content
+ * @tvlv_value_len: tvlv content length
+ */
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+ u8 *dst, u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len)
+{
+ struct batadv_unicast_tvlv_packet *unicast_tvlv_packet;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ struct batadv_orig_node *orig_node;
+ struct sk_buff *skb;
+ unsigned char *tvlv_buff;
+ unsigned int tvlv_len;
+ ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
+ int res;
+
+ orig_node = batadv_orig_hash_find(bat_priv, dst);
+ if (!orig_node)
+ return;
+
+ tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len;
+
+ skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len);
+ if (!skb)
+ goto out;
+
+ skb->priority = TC_PRIO_CONTROL;
+ skb_reserve(skb, ETH_HLEN);
+ tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len);
+ unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff;
+ unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV;
+ unicast_tvlv_packet->version = BATADV_COMPAT_VERSION;
+ unicast_tvlv_packet->ttl = BATADV_TTL;
+ unicast_tvlv_packet->reserved = 0;
+ unicast_tvlv_packet->tvlv_len = htons(tvlv_len);
+ unicast_tvlv_packet->align = 0;
+ ether_addr_copy(unicast_tvlv_packet->src, src);
+ ether_addr_copy(unicast_tvlv_packet->dst, dst);
+
+ tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1);
+ tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff;
+ tvlv_hdr->version = version;
+ tvlv_hdr->type = type;
+ tvlv_hdr->len = htons(tvlv_value_len);
+ tvlv_buff += sizeof(*tvlv_hdr);
+ memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
+
+ res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+ if (res == -1)
+ kfree_skb(skb);
+out:
+ batadv_orig_node_put(orig_node);
+}
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
new file mode 100644
index 000000000000..e4369b547b43
--- /dev/null
+++ b/net/batman-adv/tvlv.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _NET_BATMAN_ADV_TVLV_H_
+#define _NET_BATMAN_ADV_TVLV_H_
+
+#include "main.h"
+
+#include <linux/types.h>
+
+struct batadv_ogm_packet;
+
+void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
+ u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len);
+u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
+ unsigned char **packet_buff,
+ int *packet_buff_len, int packet_min_len);
+void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
+ struct batadv_ogm_packet *batadv_ogm_packet,
+ struct batadv_orig_node *orig_node);
+void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv,
+ u8 type, u8 version);
+
+void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
+ void (*optr)(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 flags,
+ void *tvlv_value,
+ u16 tvlv_value_len),
+ int (*uptr)(struct batadv_priv *bat_priv,
+ u8 *src, u8 *dst,
+ void *tvlv_value,
+ u16 tvlv_value_len),
+ u8 type, u8 version, u8 flags);
+void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
+ u8 type, u8 version);
+int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
+ bool ogm_source,
+ struct batadv_orig_node *orig_node,
+ u8 *src, u8 *dst,
+ void *tvlv_buff, u16 tvlv_buff_len);
+void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
+ u8 *dst, u8 type, u8 version,
+ void *tvlv_value, u16 tvlv_value_len);
+
+#endif /* _NET_BATMAN_ADV_TVLV_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ba846b078af8..43db7b61f8eb 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -33,6 +33,7 @@
#include <linux/types.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <uapi/linux/batman_adv.h>
#include "packet.h"
@@ -707,6 +708,8 @@ struct batadv_priv_debug_log {
* @list: list of available gateway nodes
* @list_lock: lock protecting gw_list & curr_gw
* @curr_gw: pointer to currently selected gateway node
+ * @mode: gateway operation: off, client or server (see batadv_gw_modes)
+ * @sel_class: gateway selection class (applies if gw_mode client)
* @bandwidth_down: advertised uplink download bandwidth (if gw_mode server)
* @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server)
* @reselect: bool indicating a gateway re-selection is in progress
@@ -715,6 +718,8 @@ struct batadv_priv_gw {
struct hlist_head list;
spinlock_t list_lock; /* protects gw_list & curr_gw */
struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */
+ atomic_t mode;
+ atomic_t sel_class;
atomic_t bandwidth_down;
atomic_t bandwidth_up;
atomic_t reselect;
@@ -751,14 +756,28 @@ struct batadv_priv_dat {
#ifdef CONFIG_BATMAN_ADV_MCAST
/**
+ * struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged
+ * @exists: whether a querier exists in the mesh
+ * @shadowing: if a querier exists, whether it is potentially shadowing
+ * multicast listeners (i.e. querier is behind our own bridge segment)
+ */
+struct batadv_mcast_querier_state {
+ bool exists;
+ bool shadowing;
+};
+
+/**
* struct batadv_priv_mcast - per mesh interface mcast data
* @mla_list: list of multicast addresses we are currently announcing via TT
* @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable
* multicast traffic
* @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic
* @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic
+ * @querier_ipv4: the current state of an IGMP querier in the mesh
+ * @querier_ipv6: the current state of an MLD querier in the mesh
* @flags: the flags we have last sent in our mcast tvlv
* @enabled: whether the multicast tvlv is currently enabled
+ * @bridged: whether the soft interface has a bridge on top
* @num_disabled: number of nodes that have no mcast tvlv
* @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic
* @num_want_all_ipv4: counter for items in want_all_ipv4_list
@@ -771,8 +790,11 @@ struct batadv_priv_mcast {
struct hlist_head want_all_unsnoopables_list;
struct hlist_head want_all_ipv4_list;
struct hlist_head want_all_ipv6_list;
+ struct batadv_mcast_querier_state querier_ipv4;
+ struct batadv_mcast_querier_state querier_ipv6;
u8 flags;
bool enabled;
+ bool bridged;
atomic_t num_disabled;
atomic_t num_want_all_unsnoopables;
atomic_t num_want_all_ipv4;
@@ -812,6 +834,111 @@ struct batadv_priv_nc {
};
/**
+ * struct batadv_tp_unacked - unacked packet meta-information
+ * @seqno: seqno of the unacked packet
+ * @len: length of the packet
+ * @list: list node for batadv_tp_vars::unacked_list
+ *
+ * This struct is supposed to represent a buffer unacked packet. However, since
+ * the purpose of the TP meter is to count the traffic only, there is no need to
+ * store the entire sk_buff, the starting offset and the length are enough
+ */
+struct batadv_tp_unacked {
+ u32 seqno;
+ u16 len;
+ struct list_head list;
+};
+
+/**
+ * enum batadv_tp_meter_role - Modus in tp meter session
+ * @BATADV_TP_RECEIVER: Initialized as receiver
+ * @BATADV_TP_SENDER: Initialized as sender
+ */
+enum batadv_tp_meter_role {
+ BATADV_TP_RECEIVER,
+ BATADV_TP_SENDER
+};
+
+/**
+ * struct batadv_tp_vars - tp meter private variables per session
+ * @list: list node for bat_priv::tp_list
+ * @timer: timer for ack (receiver) and retry (sender)
+ * @bat_priv: pointer to the mesh object
+ * @start_time: start time in jiffies
+ * @other_end: mac address of remote
+ * @role: receiver/sender modi
+ * @sending: sending binary semaphore: 1 if sending, 0 is not
+ * @reason: reason for a stopped session
+ * @finish_work: work item for the finishing procedure
+ * @test_length: test length in milliseconds
+ * @session: TP session identifier
+ * @icmp_uid: local ICMP "socket" index
+ * @dec_cwnd: decimal part of the cwnd used during linear growth
+ * @cwnd: current size of the congestion window
+ * @cwnd_lock: lock do protect @cwnd & @dec_cwnd
+ * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the
+ * connection switches to the Congestion Avoidance state
+ * @last_acked: last acked byte
+ * @last_sent: last sent byte, not yet acked
+ * @tot_sent: amount of data sent/ACKed so far
+ * @dup_acks: duplicate ACKs counter
+ * @fast_recovery: true if in Fast Recovery mode
+ * @recover: last sent seqno when entering Fast Recovery
+ * @rto: sender timeout
+ * @srtt: smoothed RTT scaled by 2^3
+ * @rttvar: RTT variation scaled by 2^2
+ * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout
+ * @prerandom_offset: offset inside the prerandom buffer
+ * @prerandom_lock: spinlock protecting access to prerandom_offset
+ * @last_recv: last in-order received packet
+ * @unacked_list: list of unacked packets (meta-info only)
+ * @unacked_lock: protect unacked_list
+ * @last_recv_time: time time (jiffies) a msg was received
+ * @refcount: number of context where the object is used
+ * @rcu: struct used for freeing in an RCU-safe manner
+ */
+struct batadv_tp_vars {
+ struct hlist_node list;
+ struct timer_list timer;
+ struct batadv_priv *bat_priv;
+ unsigned long start_time;
+ u8 other_end[ETH_ALEN];
+ enum batadv_tp_meter_role role;
+ atomic_t sending;
+ enum batadv_tp_meter_reason reason;
+ struct delayed_work finish_work;
+ u32 test_length;
+ u8 session[2];
+ u8 icmp_uid;
+
+ /* sender variables */
+ u16 dec_cwnd;
+ u32 cwnd;
+ spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */
+ u32 ss_threshold;
+ atomic_t last_acked;
+ u32 last_sent;
+ atomic64_t tot_sent;
+ atomic_t dup_acks;
+ bool fast_recovery;
+ u32 recover;
+ u32 rto;
+ u32 srtt;
+ u32 rttvar;
+ wait_queue_head_t more_bytes;
+ u32 prerandom_offset;
+ spinlock_t prerandom_lock; /* Protects prerandom_offset */
+
+ /* receiver variables */
+ u32 last_recv;
+ struct list_head unacked_list;
+ spinlock_t unacked_lock; /* Protects unacked_list */
+ unsigned long last_recv_time;
+ struct kref refcount;
+ struct rcu_head rcu;
+};
+
+/**
* struct batadv_softif_vlan - per VLAN attributes set
* @bat_priv: pointer to the mesh object
* @vid: VLAN identifier
@@ -865,8 +992,6 @@ struct batadv_priv_bat_v {
* enabled
* @multicast_mode: Enable or disable multicast optimizations on this node's
* sender/originating side
- * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes)
- * @gw_sel_class: gateway selection class (applies if gw_mode client)
* @orig_interval: OGM broadcast interval in milliseconds
* @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
* @log_level: configured log level (see batadv_dbg_level)
@@ -881,14 +1006,17 @@ struct batadv_priv_bat_v {
* @debug_dir: dentry for debugfs batman-adv subdirectory
* @forw_bat_list: list of aggregated OGMs that will be forwarded
* @forw_bcast_list: list of broadcast packets that will be rebroadcasted
+ * @tp_list: list of tp sessions
+ * @tp_num: number of currently active tp sessions
* @orig_hash: hash table containing mesh participants (orig nodes)
* @forw_bat_list_lock: lock protecting forw_bat_list
* @forw_bcast_list_lock: lock protecting forw_bcast_list
+ * @tp_list_lock: spinlock protecting @tp_list
* @orig_work: work queue callback item for orig node purging
* @cleanup_work: work queue callback item for soft-interface deinit
* @primary_if: one of the hard-interfaces assigned to this mesh interface
* becomes the primary interface
- * @bat_algo_ops: routing algorithm used by this mesh interface
+ * @algo_ops: routing algorithm used by this mesh interface
* @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
* of the mesh interface represented by this object
* @softif_vlan_list_lock: lock protecting softif_vlan_list
@@ -922,8 +1050,6 @@ struct batadv_priv {
#ifdef CONFIG_BATMAN_ADV_MCAST
atomic_t multicast_mode;
#endif
- atomic_t gw_mode;
- atomic_t gw_sel_class;
atomic_t orig_interval;
atomic_t hop_penalty;
#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -939,13 +1065,16 @@ struct batadv_priv {
struct dentry *debug_dir;
struct hlist_head forw_bat_list;
struct hlist_head forw_bcast_list;
+ struct hlist_head tp_list;
struct batadv_hashtable *orig_hash;
spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */
+ spinlock_t tp_list_lock; /* protects tp_list */
+ atomic_t tp_num;
struct delayed_work orig_work;
struct work_struct cleanup_work;
struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */
- struct batadv_algo_ops *bat_algo_ops;
+ struct batadv_algo_ops *algo_ops;
struct hlist_head softif_vlan_list;
spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */
#ifdef CONFIG_BATMAN_ADV_BLA
@@ -1261,66 +1390,77 @@ struct batadv_forw_packet {
};
/**
+ * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
+ * @activate: start routing mechanisms when hard-interface is brought up
+ * @enable: init routing info when hard-interface is enabled
+ * @disable: de-init routing info when hard-interface is disabled
+ * @update_mac: (re-)init mac addresses of the protocol information
+ * belonging to this hard-interface
+ * @primary_set: called when primary interface is selected / changed
+ */
+struct batadv_algo_iface_ops {
+ void (*activate)(struct batadv_hard_iface *hard_iface);
+ int (*enable)(struct batadv_hard_iface *hard_iface);
+ void (*disable)(struct batadv_hard_iface *hard_iface);
+ void (*update_mac)(struct batadv_hard_iface *hard_iface);
+ void (*primary_set)(struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
+ * @hardif_init: called on creation of single hop entry
+ * @cmp: compare the metrics of two neighbors for their respective outgoing
+ * interfaces
+ * @is_similar_or_better: check if neigh1 is equally similar or better than
+ * neigh2 for their respective outgoing interface from the metric prospective
+ * @print: print the single hop neighbor list (optional)
+ */
+struct batadv_algo_neigh_ops {
+ void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
+ int (*cmp)(struct batadv_neigh_node *neigh1,
+ struct batadv_hard_iface *if_outgoing1,
+ struct batadv_neigh_node *neigh2,
+ struct batadv_hard_iface *if_outgoing2);
+ bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1,
+ struct batadv_hard_iface *if_outgoing1,
+ struct batadv_neigh_node *neigh2,
+ struct batadv_hard_iface *if_outgoing2);
+ void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+};
+
+/**
+ * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
+ * @free: free the resources allocated by the routing algorithm for an orig_node
+ * object
+ * @add_if: ask the routing algorithm to apply the needed changes to the
+ * orig_node due to a new hard-interface being added into the mesh
+ * @del_if: ask the routing algorithm to apply the needed changes to the
+ * orig_node due to an hard-interface being removed from the mesh
+ * @print: print the originator table (optional)
+ */
+struct batadv_algo_orig_ops {
+ void (*free)(struct batadv_orig_node *orig_node);
+ int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
+ int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
+ int del_if_num);
+ void (*print)(struct batadv_priv *priv, struct seq_file *seq,
+ struct batadv_hard_iface *hard_iface);
+};
+
+/**
* struct batadv_algo_ops - mesh algorithm callbacks
* @list: list node for the batadv_algo_list
* @name: name of the algorithm
- * @bat_iface_activate: start routing mechanisms when hard-interface is brought
- * up
- * @bat_iface_enable: init routing info when hard-interface is enabled
- * @bat_iface_disable: de-init routing info when hard-interface is disabled
- * @bat_iface_update_mac: (re-)init mac addresses of the protocol information
- * belonging to this hard-interface
- * @bat_primary_iface_set: called when primary interface is selected / changed
- * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
- * @bat_ogm_emit: send scheduled OGM
- * @bat_hardif_neigh_init: called on creation of single hop entry
- * @bat_neigh_cmp: compare the metrics of two neighbors for their respective
- * outgoing interfaces
- * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or
- * better than neigh2 for their respective outgoing interface from the metric
- * prospective
- * @bat_neigh_print: print the single hop neighbor list (optional)
- * @bat_neigh_free: free the resources allocated by the routing algorithm for a
- * neigh_node object
- * @bat_orig_print: print the originator table (optional)
- * @bat_orig_free: free the resources allocated by the routing algorithm for an
- * orig_node object
- * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to
- * the orig_node due to a new hard-interface being added into the mesh
- * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to
- * the orig_node due to an hard-interface being removed from the mesh
+ * @iface: callbacks related to interface handling
+ * @neigh: callbacks related to neighbors handling
+ * @orig: callbacks related to originators handling
*/
struct batadv_algo_ops {
struct hlist_node list;
char *name;
- void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface);
- int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface);
- void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface);
- void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface);
- void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
- void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
- void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
- /* neigh_node handling API */
- void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh);
- int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
- struct batadv_hard_iface *if_outgoing1,
- struct batadv_neigh_node *neigh2,
- struct batadv_hard_iface *if_outgoing2);
- bool (*bat_neigh_is_similar_or_better)
- (struct batadv_neigh_node *neigh1,
- struct batadv_hard_iface *if_outgoing1,
- struct batadv_neigh_node *neigh2,
- struct batadv_hard_iface *if_outgoing2);
- void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq);
- void (*bat_neigh_free)(struct batadv_neigh_node *neigh);
- /* orig_node handling API */
- void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq,
- struct batadv_hard_iface *hard_iface);
- void (*bat_orig_free)(struct batadv_orig_node *orig_node);
- int (*bat_orig_add_if)(struct batadv_orig_node *orig_node,
- int max_if_num);
- int (*bat_orig_del_if)(struct batadv_orig_node *orig_node,
- int max_if_num, int del_if_num);
+ struct batadv_algo_iface_ops iface;
+ struct batadv_algo_neigh_ops neigh;
+ struct batadv_algo_orig_ops orig;
};
/**
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 2c8095a5d824..8eecd0ec22f2 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -104,8 +104,16 @@ static int br_dev_init(struct net_device *dev)
return -ENOMEM;
err = br_vlan_init(br);
- if (err)
+ if (err) {
free_percpu(br->stats);
+ return err;
+ }
+
+ err = br_multicast_init_stats(br);
+ if (err) {
+ free_percpu(br->stats);
+ br_vlan_flush(br);
+ }
br_set_lockdep_class(dev);
return err;
@@ -341,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_add_slave = br_add_slave,
.ndo_del_slave = br_del_slave,
.ndo_fix_features = br_fix_features,
+ .ndo_neigh_construct = netdev_default_l2upper_neigh_construct,
+ .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy,
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
.ndo_fdb_dump = br_fdb_dump,
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f47759f05b6d..6c196037d818 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -198,8 +198,10 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
struct sk_buff *skb),
bool unicast)
{
- struct net_bridge_port *p;
+ u8 igmp_type = br_multicast_igmp_type(skb);
+ __be16 proto = skb->protocol;
struct net_bridge_port *prev;
+ struct net_bridge_port *p;
prev = NULL;
@@ -218,6 +220,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
prev = maybe_deliver(prev, p, skb, __packet_hook);
if (IS_ERR(prev))
goto out;
+ if (prev == p)
+ br_multicast_count(p->br, p, proto, igmp_type,
+ BR_MCAST_DIR_TX);
}
if (!prev)
@@ -257,9 +262,12 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb))
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
+ u8 igmp_type = br_multicast_igmp_type(skb);
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
+ __be16 proto = skb->protocol;
+
struct hlist_node *rp;
rp = rcu_dereference(hlist_first_rcu(&br->router_list));
@@ -277,6 +285,9 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
prev = maybe_deliver(prev, port, skb, __packet_hook);
if (IS_ERR(prev))
goto out;
+ if (prev == port)
+ br_multicast_count(port->br, port, proto, igmp_type,
+ BR_MCAST_DIR_TX);
if ((unsigned long)lport >= (unsigned long)port)
p = rcu_dereference(p->next);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 8217aecf025b..f2fede05d32c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -345,8 +345,8 @@ static int find_portno(struct net_bridge *br)
static struct net_bridge_port *new_nbp(struct net_bridge *br,
struct net_device *dev)
{
- int index;
struct net_bridge_port *p;
+ int index, err;
index = find_portno(br);
if (index < 0)
@@ -366,7 +366,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
br_init_port(p);
br_set_state(p, BR_STATE_DISABLED);
br_stp_port_timer_init(p);
- br_multicast_add_port(p);
+ err = br_multicast_add_port(p);
+ if (err) {
+ dev_put(dev);
+ kfree(p);
+ p = ERR_PTR(err);
+ }
return p;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 43d2cd862bc2..786602bc0567 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -60,6 +60,9 @@ static int br_pass_frame_up(struct sk_buff *skb)
skb = br_handle_vlan(br, vg, skb);
if (!skb)
return NET_RX_DROP;
+ /* update the multicast stats if the packet is IGMP/MLD */
+ br_multicast_count(br, NULL, skb->protocol, br_multicast_igmp_type(skb),
+ BR_MCAST_DIR_TX);
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 43844144c9c4..e405eef0ae2e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -361,7 +361,8 @@ out:
}
static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
- __be32 group)
+ __be32 group,
+ u8 *igmp_type)
{
struct sk_buff *skb;
struct igmphdr *ih;
@@ -411,6 +412,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
skb_set_transport_header(skb, skb->len);
ih = igmp_hdr(skb);
+ *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
ih->code = (group ? br->multicast_last_member_interval :
br->multicast_query_response_interval) /
@@ -428,7 +430,8 @@ out:
#if IS_ENABLED(CONFIG_IPV6)
static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
- const struct in6_addr *group)
+ const struct in6_addr *grp,
+ u8 *igmp_type)
{
struct sk_buff *skb;
struct ipv6hdr *ip6h;
@@ -487,16 +490,17 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
skb_set_transport_header(skb, skb->len);
mldq = (struct mld_msg *) icmp6_hdr(skb);
- interval = ipv6_addr_any(group) ?
+ interval = ipv6_addr_any(grp) ?
br->multicast_query_response_interval :
br->multicast_last_member_interval;
+ *igmp_type = ICMPV6_MGM_QUERY;
mldq->mld_type = ICMPV6_MGM_QUERY;
mldq->mld_code = 0;
mldq->mld_cksum = 0;
mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
mldq->mld_reserved = 0;
- mldq->mld_mca = *group;
+ mldq->mld_mca = *grp;
/* checksum */
mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -513,14 +517,16 @@ out:
#endif
static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
- struct br_ip *addr)
+ struct br_ip *addr,
+ u8 *igmp_type)
{
switch (addr->proto) {
case htons(ETH_P_IP):
- return br_ip4_multicast_alloc_query(br, addr->u.ip4);
+ return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
+ return br_ip6_multicast_alloc_query(br, &addr->u.ip6,
+ igmp_type);
#endif
}
return NULL;
@@ -829,18 +835,23 @@ static void __br_multicast_send_query(struct net_bridge *br,
struct br_ip *ip)
{
struct sk_buff *skb;
+ u8 igmp_type;
- skb = br_multicast_alloc_query(br, ip);
+ skb = br_multicast_alloc_query(br, ip, &igmp_type);
if (!skb)
return;
if (port) {
skb->dev = port->dev;
+ br_multicast_count(br, port, skb->protocol, igmp_type,
+ BR_MCAST_DIR_TX);
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
dev_net(port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
} else {
br_multicast_select_own_querier(br, ip, skb);
+ br_multicast_count(br, port, skb->protocol, igmp_type,
+ BR_MCAST_DIR_RX);
netif_rx(skb);
}
}
@@ -918,7 +929,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data)
}
#endif
-void br_multicast_add_port(struct net_bridge_port *port)
+int br_multicast_add_port(struct net_bridge_port *port)
{
port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
@@ -930,6 +941,11 @@ void br_multicast_add_port(struct net_bridge_port *port)
setup_timer(&port->ip6_own_query.timer,
br_ip6_multicast_port_query_expired, (unsigned long)port);
#endif
+ port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
+ if (!port->mcast_stats)
+ return -ENOMEM;
+
+ return 0;
}
void br_multicast_del_port(struct net_bridge_port *port)
@@ -944,6 +960,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
br_multicast_del_pg(br, pg);
spin_unlock_bh(&br->multicast_lock);
del_timer_sync(&port->multicast_router_timer);
+ free_percpu(port->mcast_stats);
}
static void br_multicast_enable(struct bridge_mcast_own_query *query)
@@ -1583,6 +1600,39 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
}
#endif
+static void br_multicast_err_count(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ __be16 proto)
+{
+ struct bridge_mcast_stats __percpu *stats;
+ struct bridge_mcast_stats *pstats;
+
+ if (!br->multicast_stats_enabled)
+ return;
+
+ if (p)
+ stats = p->mcast_stats;
+ else
+ stats = br->mcast_stats;
+ if (WARN_ON(!stats))
+ return;
+
+ pstats = this_cpu_ptr(stats);
+
+ u64_stats_update_begin(&pstats->syncp);
+ switch (proto) {
+ case htons(ETH_P_IP):
+ pstats->mstats.igmp_parse_errors++;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ pstats->mstats.mld_parse_errors++;
+ break;
+#endif
+ }
+ u64_stats_update_end(&pstats->syncp);
+}
+
static int br_multicast_ipv4_rcv(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
@@ -1599,11 +1649,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
return 0;
} else if (err < 0) {
+ br_multicast_err_count(br, port, skb->protocol);
return err;
}
- BR_INPUT_SKB_CB(skb)->igmp = 1;
ih = igmp_hdr(skb);
+ BR_INPUT_SKB_CB(skb)->igmp = ih->type;
switch (ih->type) {
case IGMP_HOST_MEMBERSHIP_REPORT:
@@ -1625,6 +1676,9 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
+ br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp,
+ BR_MCAST_DIR_RX);
+
return err;
}
@@ -1645,11 +1699,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
return 0;
} else if (err < 0) {
+ br_multicast_err_count(br, port, skb->protocol);
return err;
}
- BR_INPUT_SKB_CB(skb)->igmp = 1;
mld = (struct mld_msg *)skb_transport_header(skb);
+ BR_INPUT_SKB_CB(skb)->igmp = mld->mld_type;
switch (mld->mld_type) {
case ICMPV6_MGM_REPORT:
@@ -1670,6 +1725,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
if (skb_trimmed && skb_trimmed != skb)
kfree_skb(skb_trimmed);
+ br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp,
+ BR_MCAST_DIR_RX);
+
return err;
}
#endif
@@ -1677,6 +1735,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
struct sk_buff *skb, u16 vid)
{
+ int ret = 0;
+
BR_INPUT_SKB_CB(skb)->igmp = 0;
BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
@@ -1685,14 +1745,16 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
switch (skb->protocol) {
case htons(ETH_P_IP):
- return br_multicast_ipv4_rcv(br, port, skb, vid);
+ ret = br_multicast_ipv4_rcv(br, port, skb, vid);
+ break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- return br_multicast_ipv6_rcv(br, port, skb, vid);
+ ret = br_multicast_ipv6_rcv(br, port, skb, vid);
+ break;
#endif
}
- return 0;
+ return ret;
}
static void br_multicast_query_expired(struct net_bridge *br,
@@ -1831,6 +1893,8 @@ void br_multicast_dev_del(struct net_bridge *br)
out:
spin_unlock_bh(&br->multicast_lock);
+
+ free_percpu(br->mcast_stats);
}
int br_multicast_set_router(struct net_bridge *br, unsigned long val)
@@ -2185,3 +2249,128 @@ unlock:
return ret;
}
EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
+
+static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
+ __be16 proto, u8 type, u8 dir)
+{
+ struct bridge_mcast_stats *pstats = this_cpu_ptr(stats);
+
+ u64_stats_update_begin(&pstats->syncp);
+ switch (proto) {
+ case htons(ETH_P_IP):
+ switch (type) {
+ case IGMP_HOST_MEMBERSHIP_REPORT:
+ pstats->mstats.igmp_v1reports[dir]++;
+ break;
+ case IGMPV2_HOST_MEMBERSHIP_REPORT:
+ pstats->mstats.igmp_v2reports[dir]++;
+ break;
+ case IGMPV3_HOST_MEMBERSHIP_REPORT:
+ pstats->mstats.igmp_v3reports[dir]++;
+ break;
+ case IGMP_HOST_MEMBERSHIP_QUERY:
+ pstats->mstats.igmp_queries[dir]++;
+ break;
+ case IGMP_HOST_LEAVE_MESSAGE:
+ pstats->mstats.igmp_leaves[dir]++;
+ break;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ switch (type) {
+ case ICMPV6_MGM_REPORT:
+ pstats->mstats.mld_v1reports[dir]++;
+ break;
+ case ICMPV6_MLD2_REPORT:
+ pstats->mstats.mld_v2reports[dir]++;
+ break;
+ case ICMPV6_MGM_QUERY:
+ pstats->mstats.mld_queries[dir]++;
+ break;
+ case ICMPV6_MGM_REDUCTION:
+ pstats->mstats.mld_leaves[dir]++;
+ break;
+ }
+ break;
+#endif /* CONFIG_IPV6 */
+ }
+ u64_stats_update_end(&pstats->syncp);
+}
+
+void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+ __be16 proto, u8 type, u8 dir)
+{
+ struct bridge_mcast_stats __percpu *stats;
+
+ /* if multicast_disabled is true then igmp type can't be set */
+ if (!type || !br->multicast_stats_enabled)
+ return;
+
+ if (p)
+ stats = p->mcast_stats;
+ else
+ stats = br->mcast_stats;
+ if (WARN_ON(!stats))
+ return;
+
+ br_mcast_stats_add(stats, proto, type, dir);
+}
+
+int br_multicast_init_stats(struct net_bridge *br)
+{
+ br->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
+ if (!br->mcast_stats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void mcast_stats_add_dir(u64 *dst, u64 *src)
+{
+ dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
+ dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX];
+}
+
+void br_multicast_get_stats(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ struct br_mcast_stats *dest)
+{
+ struct bridge_mcast_stats __percpu *stats;
+ struct br_mcast_stats tdst;
+ int i;
+
+ memset(dest, 0, sizeof(*dest));
+ if (p)
+ stats = p->mcast_stats;
+ else
+ stats = br->mcast_stats;
+ if (WARN_ON(!stats))
+ return;
+
+ memset(&tdst, 0, sizeof(tdst));
+ for_each_possible_cpu(i) {
+ struct bridge_mcast_stats *cpu_stats = per_cpu_ptr(stats, i);
+ struct br_mcast_stats temp;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ memcpy(&temp, &cpu_stats->mstats, sizeof(temp));
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ mcast_stats_add_dir(tdst.igmp_queries, temp.igmp_queries);
+ mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves);
+ mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports);
+ mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports);
+ mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports);
+ tdst.igmp_parse_errors += temp.igmp_parse_errors;
+
+ mcast_stats_add_dir(tdst.mld_queries, temp.mld_queries);
+ mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves);
+ mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports);
+ mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports);
+ tdst.mld_parse_errors += temp.mld_parse_errors;
+ }
+ memcpy(dest, &tdst, sizeof(*dest));
+}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2d25979273a6..77e7f69bf80d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -700,7 +700,7 @@ static int
br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
- unsigned int mtu = ip_skb_dst_mtu(skb);
+ unsigned int mtu = ip_skb_dst_mtu(sk, skb);
struct iphdr *iph = ip_hdr(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 85e89f693589..f2a29e467e78 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -851,6 +851,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 },
[IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
[IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
+ [IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1055,6 +1056,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
br->multicast_startup_query_interval = clock_t_to_jiffies(val);
}
+
+ if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
+ __u8 mcast_stats;
+
+ mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]);
+ br->multicast_stats_enabled = !!mcast_stats;
+ }
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (data[IFLA_BR_NF_CALL_IPTABLES]) {
@@ -1110,6 +1118,7 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERY_USE_IFADDR */
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERIER */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_STATS_ENABLED */
nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_ELASTICITY */
nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_MAX */
nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_LAST_MEMBER_CNT */
@@ -1187,6 +1196,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
br->multicast_query_use_ifaddr) ||
nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) ||
+ nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
+ br->multicast_stats_enabled) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
br->hash_elasticity) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
@@ -1234,7 +1245,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
return 0;
}
-static size_t br_get_linkxstats_size(const struct net_device *dev)
+static size_t bridge_get_linkxstats_size(const struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_vlan_group *vg;
@@ -1242,53 +1253,88 @@ static size_t br_get_linkxstats_size(const struct net_device *dev)
int numvls = 0;
vg = br_vlan_group(br);
- if (!vg)
- return 0;
-
- /* we need to count all, even placeholder entries */
- list_for_each_entry(v, &vg->vlan_list, vlist)
- numvls++;
+ if (vg) {
+ /* we need to count all, even placeholder entries */
+ list_for_each_entry(v, &vg->vlan_list, vlist)
+ numvls++;
+ }
- /* account for the vlans and the link xstats type nest attribute */
return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
+ nla_total_size(sizeof(struct br_mcast_stats)) +
nla_total_size(0);
}
-static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
- int *prividx)
+static size_t brport_get_linkxstats_size(const struct net_device *dev)
+{
+ return nla_total_size(sizeof(struct br_mcast_stats)) +
+ nla_total_size(0);
+}
+
+static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
+{
+ size_t retsize = 0;
+
+ switch (attr) {
+ case IFLA_STATS_LINK_XSTATS:
+ retsize = bridge_get_linkxstats_size(dev);
+ break;
+ case IFLA_STATS_LINK_XSTATS_SLAVE:
+ retsize = brport_get_linkxstats_size(dev);
+ break;
+ }
+
+ return retsize;
+}
+
+static int bridge_fill_linkxstats(struct sk_buff *skb,
+ const struct net_device *dev,
+ int *prividx)
{
struct net_bridge *br = netdev_priv(dev);
+ struct nlattr *nla __maybe_unused;
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
struct nlattr *nest;
int vl_idx = 0;
- vg = br_vlan_group(br);
- if (!vg)
- goto out;
nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
if (!nest)
return -EMSGSIZE;
- list_for_each_entry(v, &vg->vlan_list, vlist) {
- struct bridge_vlan_xstats vxi;
- struct br_vlan_stats stats;
- if (++vl_idx < *prividx)
- continue;
- memset(&vxi, 0, sizeof(vxi));
- vxi.vid = v->vid;
- br_vlan_get_stats(v, &stats);
- vxi.rx_bytes = stats.rx_bytes;
- vxi.rx_packets = stats.rx_packets;
- vxi.tx_bytes = stats.tx_bytes;
- vxi.tx_packets = stats.tx_packets;
-
- if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
+ vg = br_vlan_group(br);
+ if (vg) {
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ struct bridge_vlan_xstats vxi;
+ struct br_vlan_stats stats;
+
+ if (++vl_idx < *prividx)
+ continue;
+ memset(&vxi, 0, sizeof(vxi));
+ vxi.vid = v->vid;
+ br_vlan_get_stats(v, &stats);
+ vxi.rx_bytes = stats.rx_bytes;
+ vxi.rx_packets = stats.rx_packets;
+ vxi.tx_bytes = stats.tx_bytes;
+ vxi.tx_packets = stats.tx_packets;
+
+ if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi))
+ goto nla_put_failure;
+ }
+ }
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (++vl_idx >= *prividx) {
+ nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
+ sizeof(struct br_mcast_stats),
+ BRIDGE_XSTATS_PAD);
+ if (!nla)
goto nla_put_failure;
+ br_multicast_get_stats(br, NULL, nla_data(nla));
}
+#endif
nla_nest_end(skb, nest);
*prividx = 0;
-out:
+
return 0;
nla_put_failure:
@@ -1298,6 +1344,52 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int brport_fill_linkxstats(struct sk_buff *skb,
+ const struct net_device *dev,
+ int *prividx)
+{
+ struct net_bridge_port *p = br_port_get_rtnl(dev);
+ struct nlattr *nla __maybe_unused;
+ struct nlattr *nest;
+
+ if (!p)
+ return 0;
+
+ nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
+ if (!nest)
+ return -EMSGSIZE;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
+ sizeof(struct br_mcast_stats),
+ BRIDGE_XSTATS_PAD);
+ if (!nla) {
+ nla_nest_end(skb, nest);
+ return -EMSGSIZE;
+ }
+ br_multicast_get_stats(p->br, p, nla_data(nla));
+#endif
+ nla_nest_end(skb, nest);
+
+ return 0;
+}
+
+static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
+ int *prividx, int attr)
+{
+ int ret = -EINVAL;
+
+ switch (attr) {
+ case IFLA_STATS_LINK_XSTATS:
+ ret = bridge_fill_linkxstats(skb, dev, prividx);
+ break;
+ case IFLA_STATS_LINK_XSTATS_SLAVE:
+ ret = brport_fill_linkxstats(skb, dev, prividx);
+ break;
+ }
+
+ return ret;
+}
+
static struct rtnl_af_ops br_af_ops __read_mostly = {
.family = AF_BRIDGE,
.get_link_af_size = br_get_link_af_size_filtered,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 52edecf3c294..4dc851166ad1 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -75,6 +75,12 @@ struct bridge_mcast_querier {
struct br_ip addr;
struct net_bridge_port __rcu *port;
};
+
+/* IGMP/MLD statistics */
+struct bridge_mcast_stats {
+ struct br_mcast_stats mstats;
+ struct u64_stats_sync syncp;
+};
#endif
struct br_vlan_stats {
@@ -229,6 +235,7 @@ struct net_bridge_port
struct bridge_mcast_own_query ip6_own_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router;
+ struct bridge_mcast_stats __percpu *mcast_stats;
struct timer_list multicast_router_timer;
struct hlist_head mglist;
struct hlist_node rlist;
@@ -315,6 +322,7 @@ struct net_bridge
u8 multicast_querier:1;
u8 multicast_query_use_ifaddr:1;
u8 has_ipv6_addr:1;
+ u8 multicast_stats_enabled:1;
u32 hash_elasticity;
u32 hash_max;
@@ -337,6 +345,7 @@ struct net_bridge
struct bridge_mcast_other_query ip4_other_query;
struct bridge_mcast_own_query ip4_own_query;
struct bridge_mcast_querier ip4_querier;
+ struct bridge_mcast_stats __percpu *mcast_stats;
#if IS_ENABLED(CONFIG_IPV6)
struct bridge_mcast_other_query ip6_other_query;
struct bridge_mcast_own_query ip6_own_query;
@@ -543,7 +552,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
struct sk_buff *skb, u16 vid);
struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct sk_buff *skb, u16 vid);
-void br_multicast_add_port(struct net_bridge_port *port);
+int br_multicast_add_port(struct net_bridge_port *port);
void br_multicast_del_port(struct net_bridge_port *port);
void br_multicast_enable_port(struct net_bridge_port *port);
void br_multicast_disable_port(struct net_bridge_port *port);
@@ -576,6 +585,12 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
struct br_ip *group, int type, u8 flags);
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
int type);
+void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+ __be16 proto, u8 type, u8 dir);
+int br_multicast_init_stats(struct net_bridge *br);
+void br_multicast_get_stats(const struct net_bridge *br,
+ const struct net_bridge_port *p,
+ struct br_mcast_stats *dest);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -623,6 +638,11 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
return false;
}
}
+
+static inline int br_multicast_igmp_type(const struct sk_buff *skb)
+{
+ return BR_INPUT_SKB_CB(skb)->igmp;
+}
#else
static inline int br_multicast_rcv(struct net_bridge *br,
struct net_bridge_port *port,
@@ -638,8 +658,9 @@ static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return NULL;
}
-static inline void br_multicast_add_port(struct net_bridge_port *port)
+static inline int br_multicast_add_port(struct net_bridge_port *port)
{
+ return 0;
}
static inline void br_multicast_del_port(struct net_bridge_port *port)
@@ -695,6 +716,22 @@ static inline void br_mdb_init(void)
static inline void br_mdb_uninit(void)
{
}
+
+static inline void br_multicast_count(struct net_bridge *br,
+ const struct net_bridge_port *p,
+ __be16 proto, u8 type, u8 dir)
+{
+}
+
+static inline int br_multicast_init_stats(struct net_bridge *br)
+{
+ return 0;
+}
+
+static inline int br_multicast_igmp_type(const struct sk_buff *skb)
+{
+ return 0;
+}
#endif
/* br_vlan.c */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index beb47071e38d..e120307c6e36 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -618,6 +618,30 @@ static ssize_t multicast_startup_query_interval_store(
return store_bridge_parm(d, buf, len, set_startup_query_interval);
}
static DEVICE_ATTR_RW(multicast_startup_query_interval);
+
+static ssize_t multicast_stats_enabled_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+
+ return sprintf(buf, "%u\n", br->multicast_stats_enabled);
+}
+
+static int set_stats_enabled(struct net_bridge *br, unsigned long val)
+{
+ br->multicast_stats_enabled = !!val;
+ return 0;
+}
+
+static ssize_t multicast_stats_enabled_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_stats_enabled);
+}
+static DEVICE_ATTR_RW(multicast_stats_enabled);
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static ssize_t nf_call_iptables_show(
@@ -784,6 +808,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_multicast_query_interval.attr,
&dev_attr_multicast_query_response_interval.attr,
&dev_attr_multicast_startup_query_interval.attr,
+ &dev_attr_multicast_stats_enabled.attr,
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
&dev_attr_nf_call_iptables.attr,
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 2a449b7ab8fa..5fc4affd9fdb 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -20,16 +20,16 @@ ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
__be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type;
if (info->bitmask & EBT_802_3_SAP) {
- if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP))
+ if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.ssap))
return false;
- if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP))
+ if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.dsap))
return false;
}
if (info->bitmask & EBT_802_3_TYPE) {
if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE))
return false;
- if (FWINV(info->type != type, EBT_802_3_TYPE))
+ if (NF_INVF(info, EBT_802_3_TYPE, info->type != type))
return false;
}
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index cd457b891b27..227142282b45 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -25,14 +25,14 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL)
return false;
- if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode !=
- ah->ar_op, EBT_ARP_OPCODE))
+ if ((info->bitmask & EBT_ARP_OPCODE) &&
+ NF_INVF(info, EBT_ARP_OPCODE, info->opcode != ah->ar_op))
return false;
- if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype !=
- ah->ar_hrd, EBT_ARP_HTYPE))
+ if ((info->bitmask & EBT_ARP_HTYPE) &&
+ NF_INVF(info, EBT_ARP_HTYPE, info->htype != ah->ar_hrd))
return false;
- if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype !=
- ah->ar_pro, EBT_ARP_PTYPE))
+ if ((info->bitmask & EBT_ARP_PTYPE) &&
+ NF_INVF(info, EBT_ARP_PTYPE, info->ptype != ah->ar_pro))
return false;
if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) {
@@ -51,21 +51,22 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(daddr), &daddr);
if (dap == NULL)
return false;
- if (info->bitmask & EBT_ARP_SRC_IP &&
- FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
+ if ((info->bitmask & EBT_ARP_SRC_IP) &&
+ NF_INVF(info, EBT_ARP_SRC_IP,
+ info->saddr != (*sap & info->smsk)))
return false;
- if (info->bitmask & EBT_ARP_DST_IP &&
- FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
+ if ((info->bitmask & EBT_ARP_DST_IP) &&
+ NF_INVF(info, EBT_ARP_DST_IP,
+ info->daddr != (*dap & info->dmsk)))
return false;
- if (info->bitmask & EBT_ARP_GRAT &&
- FWINV(*dap != *sap, EBT_ARP_GRAT))
+ if ((info->bitmask & EBT_ARP_GRAT) &&
+ NF_INVF(info, EBT_ARP_GRAT, *dap != *sap))
return false;
}
if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
const unsigned char *mp;
unsigned char _mac[ETH_ALEN];
- uint8_t verdict, i;
if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
return false;
@@ -74,11 +75,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(_mac), &_mac);
if (mp == NULL)
return false;
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (mp[i] ^ info->smaddr[i]) &
- info->smmsk[i];
- if (FWINV(verdict != 0, EBT_ARP_SRC_MAC))
+ if (NF_INVF(info, EBT_ARP_SRC_MAC,
+ !ether_addr_equal_masked(mp, info->smaddr,
+ info->smmsk)))
return false;
}
@@ -88,11 +87,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
sizeof(_mac), &_mac);
if (mp == NULL)
return false;
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (mp[i] ^ info->dmaddr[i]) &
- info->dmmsk[i];
- if (FWINV(verdict != 0, EBT_ARP_DST_MAC))
+ if (NF_INVF(info, EBT_ARP_DST_MAC,
+ !ether_addr_equal_masked(mp, info->dmaddr,
+ info->dmmsk)))
return false;
}
}
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 23bca62d58d2..d06968bdf5ec 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -36,19 +36,19 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (ih == NULL)
return false;
- if (info->bitmask & EBT_IP_TOS &&
- FWINV(info->tos != ih->tos, EBT_IP_TOS))
+ if ((info->bitmask & EBT_IP_TOS) &&
+ NF_INVF(info, EBT_IP_TOS, info->tos != ih->tos))
return false;
- if (info->bitmask & EBT_IP_SOURCE &&
- FWINV((ih->saddr & info->smsk) !=
- info->saddr, EBT_IP_SOURCE))
+ if ((info->bitmask & EBT_IP_SOURCE) &&
+ NF_INVF(info, EBT_IP_SOURCE,
+ (ih->saddr & info->smsk) != info->saddr))
return false;
if ((info->bitmask & EBT_IP_DEST) &&
- FWINV((ih->daddr & info->dmsk) !=
- info->daddr, EBT_IP_DEST))
+ NF_INVF(info, EBT_IP_DEST,
+ (ih->daddr & info->dmsk) != info->daddr))
return false;
if (info->bitmask & EBT_IP_PROTO) {
- if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO))
+ if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol))
return false;
if (!(info->bitmask & EBT_IP_DPORT) &&
!(info->bitmask & EBT_IP_SPORT))
@@ -61,16 +61,16 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (info->bitmask & EBT_IP_DPORT) {
u32 dst = ntohs(pptr->dst);
- if (FWINV(dst < info->dport[0] ||
- dst > info->dport[1],
- EBT_IP_DPORT))
+ if (NF_INVF(info, EBT_IP_DPORT,
+ dst < info->dport[0] ||
+ dst > info->dport[1]))
return false;
}
if (info->bitmask & EBT_IP_SPORT) {
u32 src = ntohs(pptr->src);
- if (FWINV(src < info->sport[0] ||
- src > info->sport[1],
- EBT_IP_SPORT))
+ if (NF_INVF(info, EBT_IP_SPORT,
+ src < info->sport[0] ||
+ src > info->sport[1]))
return false;
}
}
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 98de6e7fd86d..4617491be41e 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -45,15 +45,18 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
if (ih6 == NULL)
return false;
- if (info->bitmask & EBT_IP6_TCLASS &&
- FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
+ if ((info->bitmask & EBT_IP6_TCLASS) &&
+ NF_INVF(info, EBT_IP6_TCLASS,
+ info->tclass != ipv6_get_dsfield(ih6)))
return false;
- if ((info->bitmask & EBT_IP6_SOURCE &&
- FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
- &info->saddr), EBT_IP6_SOURCE)) ||
- (info->bitmask & EBT_IP6_DEST &&
- FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
- &info->daddr), EBT_IP6_DEST)))
+ if (((info->bitmask & EBT_IP6_SOURCE) &&
+ NF_INVF(info, EBT_IP6_SOURCE,
+ ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
+ &info->saddr))) ||
+ ((info->bitmask & EBT_IP6_DEST) &&
+ NF_INVF(info, EBT_IP6_DEST,
+ ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
+ &info->daddr))))
return false;
if (info->bitmask & EBT_IP6_PROTO) {
uint8_t nexthdr = ih6->nexthdr;
@@ -63,7 +66,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off);
if (offset_ph == -1)
return false;
- if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
+ if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr))
return false;
if (!(info->bitmask & (EBT_IP6_DPORT |
EBT_IP6_SPORT | EBT_IP6_ICMP6)))
@@ -76,22 +79,24 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (info->bitmask & EBT_IP6_DPORT) {
u16 dst = ntohs(pptr->tcpudphdr.dst);
- if (FWINV(dst < info->dport[0] ||
- dst > info->dport[1], EBT_IP6_DPORT))
+ if (NF_INVF(info, EBT_IP6_DPORT,
+ dst < info->dport[0] ||
+ dst > info->dport[1]))
return false;
}
if (info->bitmask & EBT_IP6_SPORT) {
u16 src = ntohs(pptr->tcpudphdr.src);
- if (FWINV(src < info->sport[0] ||
- src > info->sport[1], EBT_IP6_SPORT))
+ if (NF_INVF(info, EBT_IP6_SPORT,
+ src < info->sport[0] ||
+ src > info->sport[1]))
return false;
}
if ((info->bitmask & EBT_IP6_ICMP6) &&
- FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
- pptr->icmphdr.type > info->icmpv6_type[1] ||
- pptr->icmphdr.code < info->icmpv6_code[0] ||
- pptr->icmphdr.code > info->icmpv6_code[1],
- EBT_IP6_ICMP6))
+ NF_INVF(info, EBT_IP6_ICMP6,
+ pptr->icmphdr.type < info->icmpv6_type[0] ||
+ pptr->icmphdr.type > info->icmpv6_type[1] ||
+ pptr->icmphdr.code < info->icmpv6_code[0] ||
+ pptr->icmphdr.code > info->icmpv6_code[1]))
return false;
}
return true;
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 6b731e12ecfa..3140eb912d7e 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -17,24 +17,24 @@
#define BPDU_TYPE_TCN 0x80
struct stp_header {
- uint8_t dsap;
- uint8_t ssap;
- uint8_t ctrl;
- uint8_t pid;
- uint8_t vers;
- uint8_t type;
+ u8 dsap;
+ u8 ssap;
+ u8 ctrl;
+ u8 pid;
+ u8 vers;
+ u8 type;
};
struct stp_config_pdu {
- uint8_t flags;
- uint8_t root[8];
- uint8_t root_cost[4];
- uint8_t sender[8];
- uint8_t port[2];
- uint8_t msg_age[2];
- uint8_t max_age[2];
- uint8_t hello_time[2];
- uint8_t forward_delay[2];
+ u8 flags;
+ u8 root[8];
+ u8 root_cost[4];
+ u8 sender[8];
+ u8 port[2];
+ u8 msg_age[2];
+ u8 max_age[2];
+ u8 hello_time[2];
+ u8 forward_delay[2];
};
#define NR16(p) (p[0] << 8 | p[1])
@@ -44,76 +44,73 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
const struct stp_config_pdu *stpc)
{
const struct ebt_stp_config_info *c;
- uint16_t v16;
- uint32_t v32;
- int verdict, i;
+ u16 v16;
+ u32 v32;
c = &info->config;
if ((info->bitmask & EBT_STP_FLAGS) &&
- FWINV(c->flags != stpc->flags, EBT_STP_FLAGS))
+ NF_INVF(info, EBT_STP_FLAGS, c->flags != stpc->flags))
return false;
if (info->bitmask & EBT_STP_ROOTPRIO) {
v16 = NR16(stpc->root);
- if (FWINV(v16 < c->root_priol ||
- v16 > c->root_priou, EBT_STP_ROOTPRIO))
+ if (NF_INVF(info, EBT_STP_ROOTPRIO,
+ v16 < c->root_priol || v16 > c->root_priou))
return false;
}
if (info->bitmask & EBT_STP_ROOTADDR) {
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (stpc->root[2+i] ^ c->root_addr[i]) &
- c->root_addrmsk[i];
- if (FWINV(verdict != 0, EBT_STP_ROOTADDR))
+ if (NF_INVF(info, EBT_STP_ROOTADDR,
+ !ether_addr_equal_masked(&stpc->root[2],
+ c->root_addr,
+ c->root_addrmsk)))
return false;
}
if (info->bitmask & EBT_STP_ROOTCOST) {
v32 = NR32(stpc->root_cost);
- if (FWINV(v32 < c->root_costl ||
- v32 > c->root_costu, EBT_STP_ROOTCOST))
+ if (NF_INVF(info, EBT_STP_ROOTCOST,
+ v32 < c->root_costl || v32 > c->root_costu))
return false;
}
if (info->bitmask & EBT_STP_SENDERPRIO) {
v16 = NR16(stpc->sender);
- if (FWINV(v16 < c->sender_priol ||
- v16 > c->sender_priou, EBT_STP_SENDERPRIO))
+ if (NF_INVF(info, EBT_STP_SENDERPRIO,
+ v16 < c->sender_priol || v16 > c->sender_priou))
return false;
}
if (info->bitmask & EBT_STP_SENDERADDR) {
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) &
- c->sender_addrmsk[i];
- if (FWINV(verdict != 0, EBT_STP_SENDERADDR))
+ if (NF_INVF(info, EBT_STP_SENDERADDR,
+ !ether_addr_equal_masked(&stpc->sender[2],
+ c->sender_addr,
+ c->sender_addrmsk)))
return false;
}
if (info->bitmask & EBT_STP_PORT) {
v16 = NR16(stpc->port);
- if (FWINV(v16 < c->portl ||
- v16 > c->portu, EBT_STP_PORT))
+ if (NF_INVF(info, EBT_STP_PORT,
+ v16 < c->portl || v16 > c->portu))
return false;
}
if (info->bitmask & EBT_STP_MSGAGE) {
v16 = NR16(stpc->msg_age);
- if (FWINV(v16 < c->msg_agel ||
- v16 > c->msg_ageu, EBT_STP_MSGAGE))
+ if (NF_INVF(info, EBT_STP_MSGAGE,
+ v16 < c->msg_agel || v16 > c->msg_ageu))
return false;
}
if (info->bitmask & EBT_STP_MAXAGE) {
v16 = NR16(stpc->max_age);
- if (FWINV(v16 < c->max_agel ||
- v16 > c->max_ageu, EBT_STP_MAXAGE))
+ if (NF_INVF(info, EBT_STP_MAXAGE,
+ v16 < c->max_agel || v16 > c->max_ageu))
return false;
}
if (info->bitmask & EBT_STP_HELLOTIME) {
v16 = NR16(stpc->hello_time);
- if (FWINV(v16 < c->hello_timel ||
- v16 > c->hello_timeu, EBT_STP_HELLOTIME))
+ if (NF_INVF(info, EBT_STP_HELLOTIME,
+ v16 < c->hello_timel || v16 > c->hello_timeu))
return false;
}
if (info->bitmask & EBT_STP_FWDD) {
v16 = NR16(stpc->forward_delay);
- if (FWINV(v16 < c->forward_delayl ||
- v16 > c->forward_delayu, EBT_STP_FWDD))
+ if (NF_INVF(info, EBT_STP_FWDD,
+ v16 < c->forward_delayl || v16 > c->forward_delayu))
return false;
}
return true;
@@ -125,7 +122,7 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct ebt_stp_info *info = par->matchinfo;
const struct stp_header *sp;
struct stp_header _stph;
- const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
+ const u8 header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00};
sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph);
if (sp == NULL)
@@ -135,8 +132,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (memcmp(sp, header, sizeof(header)))
return false;
- if (info->bitmask & EBT_STP_TYPE &&
- FWINV(info->type != sp->type, EBT_STP_TYPE))
+ if ((info->bitmask & EBT_STP_TYPE) &&
+ NF_INVF(info, EBT_STP_TYPE, info->type != sp->type))
return false;
if (sp->type == BPDU_TYPE_CONFIG &&
@@ -156,8 +153,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
{
const struct ebt_stp_info *info = par->matchinfo;
- const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
- const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
+ const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
const struct ebt_entry *e = par->entryinfo;
if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5a61f35412a0..cceac5bb658f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -121,7 +121,6 @@ ebt_dev_check(const char *entry, const struct net_device *device)
return devname[i] != entry[i] && entry[i] != 1;
}
-#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg))
/* process standard matches */
static inline int
ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
@@ -130,7 +129,6 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
const struct ethhdr *h = eth_hdr(skb);
const struct net_bridge_port *p;
__be16 ethproto;
- int verdict, i;
if (skb_vlan_tag_present(skb))
ethproto = htons(ETH_P_8021Q);
@@ -138,38 +136,36 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
ethproto = h->h_proto;
if (e->bitmask & EBT_802_3) {
- if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO))
+ if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto)))
return 1;
} else if (!(e->bitmask & EBT_NOPROTO) &&
- FWINV2(e->ethproto != ethproto, EBT_IPROTO))
+ NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto))
return 1;
- if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
+ if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in)))
return 1;
- if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
+ if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out)))
return 1;
/* rcu_read_lock()ed by nf_hook_slow */
if (in && (p = br_port_get_rcu(in)) != NULL &&
- FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN))
+ NF_INVF(e, EBT_ILOGICALIN,
+ ebt_dev_check(e->logical_in, p->br->dev)))
return 1;
if (out && (p = br_port_get_rcu(out)) != NULL &&
- FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT))
+ NF_INVF(e, EBT_ILOGICALOUT,
+ ebt_dev_check(e->logical_out, p->br->dev)))
return 1;
if (e->bitmask & EBT_SOURCEMAC) {
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (h->h_source[i] ^ e->sourcemac[i]) &
- e->sourcemsk[i];
- if (FWINV2(verdict != 0, EBT_ISOURCE))
+ if (NF_INVF(e, EBT_ISOURCE,
+ !ether_addr_equal_masked(h->h_source, e->sourcemac,
+ e->sourcemsk)))
return 1;
}
if (e->bitmask & EBT_DESTMAC) {
- verdict = 0;
- for (i = 0; i < 6; i++)
- verdict |= (h->h_dest[i] ^ e->destmac[i]) &
- e->destmsk[i];
- if (FWINV2(verdict != 0, EBT_IDEST))
+ if (NF_INVF(e, EBT_IDEST,
+ !ether_addr_equal_masked(h->h_dest, e->destmac,
+ e->destmsk)))
return 1;
}
return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index aba10d2a8bc3..b92d63bfde7a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5445,6 +5445,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
EXPORT_SYMBOL(netdev_lower_get_next);
/**
+ * netdev_all_lower_get_next - Get the next device from all lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour all lower
+ * list will remain unchanged.
+ */
+struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry(*iter, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->all_adj_list.lower)
+ return NULL;
+
+ *iter = lower->list.next;
+
+ return lower->dev;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next);
+
+/**
+ * netdev_all_lower_get_next_rcu - Get the next device from all
+ * lower neighbour list, RCU variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
+ struct netdev_adjacent, list);
+
+ return lower ? lower->dev : NULL;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+
+/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
* variant
@@ -6041,6 +6087,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
}
EXPORT_SYMBOL(netdev_lower_state_changed);
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+ struct neighbour *n)
+{
+ struct net_device *lower_dev, *stop_dev;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (!lower_dev->netdev_ops->ndo_neigh_construct)
+ continue;
+ err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
+ if (err) {
+ stop_dev = lower_dev;
+ goto rollback;
+ }
+ }
+ return 0;
+
+rollback:
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (lower_dev == stop_dev)
+ break;
+ if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+ continue;
+ lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
+
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+ struct neighbour *n)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+ continue;
+ lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+ }
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 933e8d4d3968..b2e592a198c0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1394,6 +1394,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
}
+static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags, u16 mode)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ struct sk_buff *msg;
+ u16 mode;
+ int err;
+
+ if (!ops || !ops->eswitch_mode_get)
+ return -EOPNOTSUPP;
+
+ err = ops->eswitch_mode_get(devlink, &mode);
+ if (err)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
+ info->snd_portid, info->snd_seq, 0, mode);
+
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ u16 mode;
+
+ if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE])
+ return -EINVAL;
+
+ mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+
+ if (ops && ops->eswitch_mode_set)
+ return ops->eswitch_mode_set(devlink, mode);
+ return -EOPNOTSUPP;
+}
+
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -1407,6 +1479,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
[DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -1525,6 +1598,20 @@ static const struct genl_ops devlink_nl_ops[] = {
DEVLINK_NL_FLAG_NEED_SB |
DEVLINK_NL_FLAG_LOCK_PORTS,
},
+ {
+ .cmd = DEVLINK_CMD_ESWITCH_MODE_GET,
+ .doit = devlink_nl_cmd_eswitch_mode_get_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_ESWITCH_MODE_SET,
+ .doit = devlink_nl_cmd_eswitch_mode_set_doit,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
/**
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98298b11f534..be4629c344a6 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -269,6 +269,49 @@ errout:
return err;
}
+static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
+ struct nlattr **tb, struct fib_rule *rule)
+{
+ struct fib_rule *r;
+
+ list_for_each_entry(r, &ops->rules_list, list) {
+ if (r->action != rule->action)
+ continue;
+
+ if (r->table != rule->table)
+ continue;
+
+ if (r->pref != rule->pref)
+ continue;
+
+ if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
+ continue;
+
+ if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
+ continue;
+
+ if (r->mark != rule->mark)
+ continue;
+
+ if (r->mark_mask != rule->mark_mask)
+ continue;
+
+ if (r->tun_id != rule->tun_id)
+ continue;
+
+ if (r->fr_net != rule->fr_net)
+ continue;
+
+ if (r->l3mdev != rule->l3mdev)
+ continue;
+
+ if (!ops->compare(r, frh, tb))
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
@@ -386,6 +429,12 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
if (rule->l3mdev && rule->table)
goto errout_free;
+ if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
+ rule_exists(ops, frh, tb, rule)) {
+ err = -EEXIST;
+ goto errout_free;
+ }
+
err = ops->configure(rule, skb, frh, tb);
if (err < 0)
goto errout_free;
diff --git a/net/core/filter.c b/net/core/filter.c
index cb9fc16cac46..10c4a2f9e8bb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -150,6 +150,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return raw_smp_processor_id();
}
+static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
+ .func = __get_raw_cpu_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+};
+
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
struct bpf_insn *insn_buf)
{
@@ -1295,21 +1301,10 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
{
- struct bpf_prog *prog;
-
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return ERR_PTR(-EPERM);
- prog = bpf_prog_get(ufd);
- if (IS_ERR(prog))
- return prog;
-
- if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
- bpf_prog_put(prog);
- return ERR_PTR(-EINVAL);
- }
-
- return prog;
+ return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
}
int sk_attach_bpf(u32 ufd, struct sock *sk)
@@ -1734,6 +1729,23 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ /* If skb_clear_hash() was called due to mangling, we can
+ * trigger SW recalculation here. Later access to hash
+ * can then use the inline skb->hash via context directly
+ * instead of calling this helper again.
+ */
+ return skb_get_hash((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
+ .func = bpf_get_hash_recalc,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1777,6 +1789,224 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
};
EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
+static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
+{
+ /* Caller already did skb_cow() with len as headroom,
+ * so no need to do it here.
+ */
+ skb_push(skb, len);
+ memmove(skb->data, skb->data + len, off);
+ memset(skb->data + off, 0, len);
+
+ /* No skb_postpush_rcsum(skb, skb->data + off, len)
+ * needed here as it does not change the skb->csum
+ * result for checksum complete when summing over
+ * zeroed blocks.
+ */
+ return 0;
+}
+
+static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
+{
+ /* skb_ensure_writable() is not needed here, as we're
+ * already working on an uncloned skb.
+ */
+ if (unlikely(!pskb_may_pull(skb, off + len)))
+ return -ENOMEM;
+
+ skb_postpull_rcsum(skb, skb->data + off, len);
+ memmove(skb->data + len, skb->data, off);
+ __skb_pull(skb, len);
+
+ return 0;
+}
+
+static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
+{
+ bool trans_same = skb->transport_header == skb->network_header;
+ int ret;
+
+ /* There's no need for __skb_push()/__skb_pull() pair to
+ * get to the start of the mac header as we're guaranteed
+ * to always start from here under eBPF.
+ */
+ ret = bpf_skb_generic_push(skb, off, len);
+ if (likely(!ret)) {
+ skb->mac_header -= len;
+ skb->network_header -= len;
+ if (trans_same)
+ skb->transport_header = skb->network_header;
+ }
+
+ return ret;
+}
+
+static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
+{
+ bool trans_same = skb->transport_header == skb->network_header;
+ int ret;
+
+ /* Same here, __skb_push()/__skb_pull() pair not needed. */
+ ret = bpf_skb_generic_pop(skb, off, len);
+ if (likely(!ret)) {
+ skb->mac_header += len;
+ skb->network_header += len;
+ if (trans_same)
+ skb->transport_header = skb->network_header;
+ }
+
+ return ret;
+}
+
+static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
+{
+ const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+ u32 off = skb->network_header - skb->mac_header;
+ int ret;
+
+ ret = skb_cow(skb, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = bpf_skb_net_hdr_push(skb, off, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (skb_is_gso(skb)) {
+ /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
+ * be changed into SKB_GSO_TCPV6.
+ */
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+ skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+ }
+
+ /* Due to IPv6 header, MSS needs to be downgraded. */
+ skb_shinfo(skb)->gso_size -= len_diff;
+ /* Header must be checked, and gso_segs recomputed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ }
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb_clear_hash(skb);
+
+ return 0;
+}
+
+static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
+{
+ const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
+ u32 off = skb->network_header - skb->mac_header;
+ int ret;
+
+ ret = skb_unclone(skb, GFP_ATOMIC);
+ if (unlikely(ret < 0))
+ return ret;
+
+ ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+
+ if (skb_is_gso(skb)) {
+ /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
+ * be changed into SKB_GSO_TCPV4.
+ */
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
+ skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ }
+
+ /* Due to IPv4 header, MSS can be upgraded. */
+ skb_shinfo(skb)->gso_size += len_diff;
+ /* Header must be checked, and gso_segs recomputed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+ }
+
+ skb->protocol = htons(ETH_P_IP);
+ skb_clear_hash(skb);
+
+ return 0;
+}
+
+static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
+{
+ __be16 from_proto = skb->protocol;
+
+ if (from_proto == htons(ETH_P_IP) &&
+ to_proto == htons(ETH_P_IPV6))
+ return bpf_skb_proto_4_to_6(skb);
+
+ if (from_proto == htons(ETH_P_IPV6) &&
+ to_proto == htons(ETH_P_IP))
+ return bpf_skb_proto_6_to_4(skb);
+
+ return -ENOTSUPP;
+}
+
+static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ __be16 proto = (__force __be16) r2;
+ int ret;
+
+ if (unlikely(flags))
+ return -EINVAL;
+
+ /* General idea is that this helper does the basic groundwork
+ * needed for changing the protocol, and eBPF program fills the
+ * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
+ * and other helpers, rather than passing a raw buffer here.
+ *
+ * The rationale is to keep this minimal and without a need to
+ * deal with raw packet data. F.e. even if we would pass buffers
+ * here, the program still needs to call the bpf_lX_csum_replace()
+ * helpers anyway. Plus, this way we keep also separation of
+ * concerns, since f.e. bpf_skb_store_bytes() should only take
+ * care of stores.
+ *
+ * Currently, additional options and extension header space are
+ * not supported, but flags register is reserved so we can adapt
+ * that. For offloads, we mark packet as dodgy, so that headers
+ * need to be verified first.
+ */
+ ret = bpf_skb_proto_xlat(skb, proto);
+ bpf_compute_data_end(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_proto_proto = {
+ .func = bpf_skb_change_proto,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ u32 pkt_type = r2;
+
+ /* We only allow a restricted subset to be changed for now. */
+ if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
+ pkt_type > PACKET_OTHERHOST))
+ return -EINVAL;
+
+ skb->pkt_type = pkt_type;
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_change_type_proto = {
+ .func = bpf_skb_change_type,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
bool bpf_helper_changes_skb_data(void *func)
{
if (func == bpf_skb_vlan_push)
@@ -1785,6 +2015,8 @@ bool bpf_helper_changes_skb_data(void *func)
return true;
if (func == bpf_skb_store_bytes)
return true;
+ if (func == bpf_skb_change_proto)
+ return true;
if (func == bpf_l3_csum_replace)
return true;
if (func == bpf_l4_csum_replace)
@@ -2024,6 +2256,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
}
}
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *)(long)r1;
+ struct bpf_map *map = (struct bpf_map *)(long)r2;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct cgroup *cgrp;
+ struct sock *sk;
+ u32 i = (u32)r3;
+
+ sk = skb->sk;
+ if (!sk || !sk_fullsock(sk))
+ return -ENOENT;
+
+ if (unlikely(i >= array->map.max_entries))
+ return -E2BIG;
+
+ cgrp = READ_ONCE(array->ptrs[i]);
+ if (unlikely(!cgrp))
+ return -EAGAIN;
+
+ return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+}
+
+static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
+ .func = bpf_skb_in_cgroup,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+};
+#endif
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
@@ -2037,7 +2303,7 @@ sk_filter_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
- return &bpf_get_smp_processor_id_proto;
+ return &bpf_get_raw_smp_processor_id_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
case BPF_FUNC_ktime_get_ns:
@@ -2072,6 +2338,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_vlan_push_proto;
case BPF_FUNC_skb_vlan_pop:
return &bpf_skb_vlan_pop_proto;
+ case BPF_FUNC_skb_change_proto:
+ return &bpf_skb_change_proto_proto;
+ case BPF_FUNC_skb_change_type:
+ return &bpf_skb_change_type_proto;
case BPF_FUNC_skb_get_tunnel_key:
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
@@ -2084,8 +2354,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_redirect_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
+ case BPF_FUNC_get_hash_recalc:
+ return &bpf_get_hash_recalc_proto;
case BPF_FUNC_perf_event_output:
return bpf_get_event_output_proto();
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ case BPF_FUNC_skb_in_cgroup:
+ return &bpf_skb_in_cgroup_proto;
+#endif
default:
return sk_filter_func_proto(func_id);
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a669dea146c6..61ad43f61c5e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
}
EXPORT_SYMBOL(make_flow_keys_digest);
+static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
+
+u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+{
+ struct flow_keys keys;
+
+ __flow_hash_secret_init();
+
+ memset(&keys, 0, sizeof(keys));
+ __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys,
+ NULL, 0, 0, 0,
+ FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+ return __flow_hash_from_keys(&keys, hashrnd);
+}
+EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
+
/**
* __skb_get_hash: calculate a flow hash
* @skb: sk_buff to calculate flow hash from
@@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = {
},
};
+static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v4addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs.v6addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct flow_keys, ports),
+ },
+};
+
static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
@@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void)
skb_flow_dissector_init(&flow_keys_dissector,
flow_keys_dissector_keys,
ARRAY_SIZE(flow_keys_dissector_keys));
+ skb_flow_dissector_init(&flow_keys_dissector_symmetric,
+ flow_keys_dissector_symmetric_keys,
+ ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
skb_flow_dissector_init(&flow_keys_buf_dissector,
flow_keys_buf_dissector_keys,
ARRAY_SIZE(flow_keys_buf_dissector_keys));
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 510cd62fcb99..5cdc62a8eb84 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
}
if (dev->netdev_ops->ndo_neigh_construct) {
- error = dev->netdev_ops->ndo_neigh_construct(n);
+ error = dev->netdev_ops->ndo_neigh_construct(dev, n);
if (error < 0) {
rc = ERR_PTR(error);
goto out_neigh_release;
@@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh)
neigh->arp_queue_len_bytes = 0;
if (dev->netdev_ops->ndo_neigh_destroy)
- dev->netdev_ops->ndo_neigh_destroy(neigh);
+ dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
dev_put(dev);
neigh_parms_put(neigh->parms);
@@ -2047,6 +2047,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
case NDTPA_DELAY_PROBE_TIME:
NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
nla_get_msecs(tbp[i]));
+ call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
break;
case NDTPA_RETRANS_TIME:
NEIGH_VAR_SET(p, RETRANS_TIME,
@@ -2930,6 +2931,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
return;
set_bit(index, p->data_state);
+ call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
if (!dev) /* NULL dev means this is default value */
neigh_copy_dflt_parms(net, p, index);
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7a0b616557ab..6e4f34721080 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -322,7 +322,20 @@ NETDEVICE_SHOW_RW(flags, fmt_hex);
static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
{
- dev->tx_queue_len = new_len;
+ int res, orig_len = dev->tx_queue_len;
+
+ if (new_len != orig_len) {
+ dev->tx_queue_len = new_len;
+ res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+ res = notifier_to_errno(res);
+ if (res) {
+ netdev_err(dev,
+ "refused to change device tx_queue_len\n");
+ dev->tx_queue_len = orig_len;
+ return -EFAULT;
+ }
+ }
+
return 0;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f74ab9c3b38f..bbd118b19aef 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -213,6 +213,7 @@
/* Xmit modes */
#define M_START_XMIT 0 /* Default normal TX */
#define M_NETIF_RECEIVE 1 /* Inject packets into stack */
+#define M_QUEUE_XMIT 2 /* Inject packet into qdisc */
/* If lock -- protects updating of if_list */
#define if_lock(t) spin_lock(&(t->if_lock));
@@ -626,6 +627,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->xmit_mode == M_NETIF_RECEIVE)
seq_puts(seq, " xmit_mode: netif_receive\n");
+ else if (pkt_dev->xmit_mode == M_QUEUE_XMIT)
+ seq_puts(seq, " xmit_mode: xmit_queue\n");
seq_puts(seq, " Flags: ");
@@ -1142,8 +1145,10 @@ static ssize_t pktgen_if_write(struct file *file,
return len;
i += len;
- if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) &&
- (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+ if ((value > 1) &&
+ ((pkt_dev->xmit_mode == M_QUEUE_XMIT) ||
+ ((pkt_dev->xmit_mode == M_START_XMIT) &&
+ (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))))
return -ENOTSUPP;
pkt_dev->burst = value < 1 ? 1 : value;
sprintf(pg_result, "OK: burst=%d", pkt_dev->burst);
@@ -1198,6 +1203,9 @@ static ssize_t pktgen_if_write(struct file *file,
* at module loading time
*/
pkt_dev->clone_skb = 0;
+ } else if (strcmp(f, "queue_xmit") == 0) {
+ pkt_dev->xmit_mode = M_QUEUE_XMIT;
+ pkt_dev->last_ok = 1;
} else {
sprintf(pg_result,
"xmit_mode -:%s:- unknown\nAvailable modes: %s",
@@ -3434,6 +3442,36 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
#endif
} while (--burst > 0);
goto out; /* Skips xmit_mode M_START_XMIT */
+ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
+ local_bh_disable();
+ atomic_inc(&pkt_dev->skb->users);
+
+ ret = dev_queue_xmit(pkt_dev->skb);
+ switch (ret) {
+ case NET_XMIT_SUCCESS:
+ pkt_dev->sofar++;
+ pkt_dev->seq_num++;
+ pkt_dev->tx_bytes += pkt_dev->last_pkt_size;
+ break;
+ case NET_XMIT_DROP:
+ case NET_XMIT_CN:
+ /* These are all valid return codes for a qdisc but
+ * indicate packets are being dropped or will likely
+ * be dropped soon.
+ */
+ case NETDEV_TX_BUSY:
+ /* qdisc may call dev_hard_start_xmit directly in cases
+ * where no queues exist e.g. loopback device, virtual
+ * devices, etc. In this case we need to handle
+ * NETDEV_TX_ codes.
+ */
+ default:
+ pkt_dev->errors++;
+ net_info_ratelimited("%s xmit error: %d\n",
+ pkt_dev->odevname, ret);
+ break;
+ }
+ goto out;
}
txq = skb_get_tx_queue(odev, pkt_dev->skb);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eb49ca24274a..a9e3805af739 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1927,11 +1927,19 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_TXQLEN]) {
unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
-
- if (dev->tx_queue_len ^ value)
+ unsigned long orig_len = dev->tx_queue_len;
+
+ if (dev->tx_queue_len ^ value) {
+ dev->tx_queue_len = value;
+ err = call_netdevice_notifiers(
+ NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+ err = notifier_to_errno(err);
+ if (err) {
+ dev->tx_queue_len = orig_len;
+ goto errout;
+ }
status |= DO_SETLINK_NOTIFY;
-
- dev->tx_queue_len = value;
+ }
}
if (tb[IFLA_OPERSTATE])
@@ -3519,7 +3527,32 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (!attr)
goto nla_put_failure;
- err = ops->fill_linkxstats(skb, dev, prividx);
+ err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
+ nla_nest_end(skb, attr);
+ if (err)
+ goto nla_put_failure;
+ *idxattr = 0;
+ }
+ }
+
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE,
+ *idxattr)) {
+ const struct rtnl_link_ops *ops = NULL;
+ const struct net_device *master;
+
+ master = netdev_master_upper_dev_get(dev);
+ if (master)
+ ops = master->rtnl_link_ops;
+ if (ops && ops->fill_linkxstats) {
+ int err;
+
+ *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
+ attr = nla_nest_start(skb,
+ IFLA_STATS_LINK_XSTATS_SLAVE);
+ if (!attr)
+ goto nla_put_failure;
+
+ err = ops->fill_linkxstats(skb, dev, prividx, *idxattr);
nla_nest_end(skb, attr);
if (err)
goto nla_put_failure;
@@ -3555,14 +3588,35 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) {
const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
+ int attr = IFLA_STATS_LINK_XSTATS;
if (ops && ops->get_linkxstats_size) {
- size += nla_total_size(ops->get_linkxstats_size(dev));
+ size += nla_total_size(ops->get_linkxstats_size(dev,
+ attr));
/* for IFLA_STATS_LINK_XSTATS */
size += nla_total_size(0);
}
}
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) {
+ struct net_device *_dev = (struct net_device *)dev;
+ const struct rtnl_link_ops *ops = NULL;
+ const struct net_device *master;
+
+ /* netdev_master_upper_dev_get can't take const */
+ master = netdev_master_upper_dev_get(_dev);
+ if (master)
+ ops = master->rtnl_link_ops;
+ if (ops && ops->get_linkxstats_size) {
+ int attr = IFLA_STATS_LINK_XSTATS_SLAVE;
+
+ size += nla_total_size(ops->get_linkxstats_size(dev,
+ attr));
+ /* for IFLA_STATS_LINK_XSTATS_SLAVE */
+ size += nla_total_size(0);
+ }
+ }
+
return size;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e7ec6d3ad5f0..3864b4b68fa1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3017,24 +3017,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
EXPORT_SYMBOL_GPL(skb_append_pagefrags);
/**
- * skb_push_rcsum - push skb and update receive checksum
- * @skb: buffer to update
- * @len: length of data pulled
- *
- * This function performs an skb_push on the packet and updates
- * the CHECKSUM_COMPLETE checksum. It should be used on
- * receive path processing instead of skb_push unless you know
- * that the checksum difference is zero (e.g., a valid IP header)
- * or you are setting ip_summed to CHECKSUM_NONE.
- */
-static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
-{
- skb_push(skb, len);
- skb_postpush_rcsum(skb, skb->data, len);
- return skb->data;
-}
-
-/**
* skb_pull_rcsum - pull skb and update receive checksum
* @skb: buffer to update
* @len: length of data pulled
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index df4803437888..a796fc7cbc35 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -41,6 +41,7 @@
#include <net/dn_fib.h>
#include <net/dn_neigh.h>
#include <net/dn_dev.h>
+#include <net/nexthop.h>
#define RT_MIN_TABLE 1
@@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr)
struct rtnexthop *nhp = nla_data(attr);
int nhs = 0, nhlen = nla_len(attr);
- while(nhlen >= (int)sizeof(struct rtnexthop)) {
- if ((nhlen -= nhp->rtnh_len) < 0)
- return 0;
+ while (rtnh_ok(nhp, nhlen)) {
nhs++;
- nhp = RTNH_NEXT(nhp);
+ nhp = rtnh_next(nhp, &nhlen);
}
- return nhs;
+ /* leftover implies invalid nexthop configuration, discard it */
+ return nhlen > 0 ? 0 : nhs;
}
static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
@@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
int nhlen = nla_len(attr);
change_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
+ int attrlen;
+
+ if (!rtnh_ok(nhp, nhlen))
return -EINVAL;
nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
nh->nh_oif = nhp->rtnh_ifindex;
nh->nh_weight = nhp->rtnh_hops + 1;
- if (attrlen) {
+ attrlen = rtnh_attrlen(nhp);
+ if (attrlen > 0) {
struct nlattr *gw_attr;
gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
}
- nhp = RTNH_NEXT(nhp);
+
+ nhp = rtnh_next(nhp, &nhlen);
} endfor_nexthops(fi);
return 0;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 8c004a0c8d64..935ab932e841 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -81,7 +81,7 @@ static int lowpan_stop(struct net_device *dev)
return 0;
}
-static int lowpan_neigh_construct(struct neighbour *n)
+static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
{
struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cbac493c913a..e23f141c9ba5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -271,7 +271,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
return dst_output(net, sk, skb);
}
#endif
- mtu = ip_skb_dst_mtu(skb);
+ mtu = ip_skb_dst_mtu(sk, skb);
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
@@ -541,7 +541,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
iph = ip_hdr(skb);
- mtu = ip_skb_dst_mtu(skb);
+ mtu = ip_skb_dst_mtu(sk, skb);
if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
mtu = IPCB(skb)->frag_max_size;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2033f929aa66..c8dd9e26b185 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -89,22 +89,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
__be32 src_ipaddr, tgt_ipaddr;
long ret;
-#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
-
- if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
- ARPT_INV_ARPOP))
+ if (NF_INVF(arpinfo, ARPT_INV_ARPOP,
+ (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop))
return 0;
- if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
- ARPT_INV_ARPHRD))
+ if (NF_INVF(arpinfo, ARPT_INV_ARPHRD,
+ (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd))
return 0;
- if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
- ARPT_INV_ARPPRO))
+ if (NF_INVF(arpinfo, ARPT_INV_ARPPRO,
+ (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro))
return 0;
- if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
- ARPT_INV_ARPHLN))
+ if (NF_INVF(arpinfo, ARPT_INV_ARPHLN,
+ (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln))
return 0;
src_devaddr = arpptr;
@@ -115,31 +113,32 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
arpptr += dev->addr_len;
memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
- if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
- ARPT_INV_SRCDEVADDR) ||
- FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
- ARPT_INV_TGTDEVADDR))
+ if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR,
+ arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr,
+ dev->addr_len)) ||
+ NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR,
+ arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr,
+ dev->addr_len)))
return 0;
- if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
- ARPT_INV_SRCIP) ||
- FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
- ARPT_INV_TGTIP))
+ if (NF_INVF(arpinfo, ARPT_INV_SRCIP,
+ (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) ||
+ NF_INVF(arpinfo, ARPT_INV_TGTIP,
+ (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr))
return 0;
/* Look for ifname matches. */
ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
- if (FWINV(ret != 0, ARPT_INV_VIA_IN))
+ if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0))
return 0;
ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
- if (FWINV(ret != 0, ARPT_INV_VIA_OUT))
+ if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0))
return 0;
return 1;
-#undef FWINV
}
static inline int arp_checkentry(const struct arpt_arp *arp)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 54906e0e8e0c..f0df66f54ce6 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -58,32 +58,31 @@ ip_packet_match(const struct iphdr *ip,
{
unsigned long ret;
-#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
-
- if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
- IPT_INV_SRCIP) ||
- FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
- IPT_INV_DSTIP))
+ if (NF_INVF(ipinfo, IPT_INV_SRCIP,
+ (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) ||
+ NF_INVF(ipinfo, IPT_INV_DSTIP,
+ (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr))
return false;
ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
- if (FWINV(ret != 0, IPT_INV_VIA_IN))
+ if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0))
return false;
ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
- if (FWINV(ret != 0, IPT_INV_VIA_OUT))
+ if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0))
return false;
/* Check specific protocol */
if (ipinfo->proto &&
- FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO))
+ NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto))
return false;
/* If we have a fragment rule but the packet is not a fragment
* then we return zero */
- if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG))
+ if (NF_INVF(ipinfo, IPT_INV_FRAG,
+ (ipinfo->flags & IPT_F_FRAG) && !isfrag))
return false;
return true;
@@ -122,7 +121,6 @@ static inline bool unconditional(const struct ipt_entry *e)
return e->target_offset == sizeof(struct ipt_entry) &&
memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
-#undef FWINV
}
/* for const-correctness */
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 57fc97cdac70..aebdb337fd7e 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -87,10 +87,6 @@ iptable_mangle_hook(void *priv,
{
if (state->hook == NF_INET_LOCAL_OUT)
return ipt_mangle_out(skb, state);
- if (state->hook == NF_INET_POST_ROUTING)
- return ipt_do_table(skb, state,
- state->net->ipv4.iptable_mangle);
- /* PREROUTING/INPUT/FORWARD: */
return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index b6ea57ec5e14..fd8220213afc 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -24,6 +24,9 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
return NULL;
+ if (ip_hdr(oldskb)->protocol != IPPROTO_TCP)
+ return NULL;
+
oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
sizeof(struct tcphdr), _oth);
if (oth == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5c7ed147449c..032a96d78c99 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk)
((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
}
+static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
+{
+ struct tcp_repair_window opt;
+
+ if (!tp->repair)
+ return -EPERM;
+
+ if (len != sizeof(opt))
+ return -EINVAL;
+
+ if (copy_from_user(&opt, optbuf, sizeof(opt)))
+ return -EFAULT;
+
+ if (opt.max_window < opt.snd_wnd)
+ return -EINVAL;
+
+ if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
+ return -EINVAL;
+
+ if (after(opt.rcv_wup, tp->rcv_nxt))
+ return -EINVAL;
+
+ tp->snd_wl1 = opt.snd_wl1;
+ tp->snd_wnd = opt.snd_wnd;
+ tp->max_window = opt.max_window;
+
+ tp->rcv_wnd = opt.rcv_wnd;
+ tp->rcv_wup = opt.rcv_wup;
+
+ return 0;
+}
+
static int tcp_repair_options_est(struct tcp_sock *tp,
struct tcp_repair_opt __user *optbuf, unsigned int len)
{
@@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
else
tp->tsoffset = val - tcp_time_stamp;
break;
+ case TCP_REPAIR_WINDOW:
+ err = tcp_repair_set_window(tp, optval, optlen);
+ break;
case TCP_NOTSENT_LOWAT:
tp->notsent_lowat = val;
sk->sk_write_space(sk);
@@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EINVAL;
break;
+ case TCP_REPAIR_WINDOW: {
+ struct tcp_repair_window opt;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ if (len != sizeof(opt))
+ return -EINVAL;
+
+ if (!tp->repair)
+ return -EPERM;
+
+ opt.snd_wl1 = tp->snd_wl1;
+ opt.snd_wnd = tp->snd_wnd;
+ opt.max_window = tp->max_window;
+ opt.rcv_wnd = tp->rcv_wnd;
+ opt.rcv_wup = tp->rcv_wup;
+
+ if (copy_to_user(optval, &opt, len))
+ return -EFAULT;
+ return 0;
+ }
case TCP_QUEUE_SEQ:
if (tp->repair_queue == TCP_SEND_QUEUE)
val = tp->write_seq;
@@ -2969,8 +3026,18 @@ static void __tcp_alloc_md5sig_pool(void)
return;
for_each_possible_cpu(cpu) {
+ void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
struct ahash_request *req;
+ if (!scratch) {
+ scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
+ sizeof(struct tcphdr),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!scratch)
+ return;
+ per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
+ }
if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
continue;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3708de2a6683..32b048e524d6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1018,27 +1018,28 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
GFP_KERNEL);
}
-static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
- __be32 daddr, __be32 saddr, int nbytes)
+static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
+ __be32 daddr, __be32 saddr,
+ const struct tcphdr *th, int nbytes)
{
struct tcp4_pseudohdr *bp;
struct scatterlist sg;
+ struct tcphdr *_th;
- bp = &hp->md5_blk.ip4;
-
- /*
- * 1. the TCP pseudo-header (in the order: source IP address,
- * destination IP address, zero-padded protocol number, and
- * segment length)
- */
+ bp = hp->scratch;
bp->saddr = saddr;
bp->daddr = daddr;
bp->pad = 0;
bp->protocol = IPPROTO_TCP;
bp->len = cpu_to_be16(nbytes);
- sg_init_one(&sg, bp, sizeof(*bp));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+ _th = (struct tcphdr *)(bp + 1);
+ memcpy(_th, th, sizeof(*th));
+ _th->check = 0;
+
+ sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ sizeof(*bp) + sizeof(*th));
return crypto_ahash_update(hp->md5_req);
}
@@ -1055,9 +1056,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
if (crypto_ahash_init(req))
goto clear_hash;
- if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_header(hp, th))
+ if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
@@ -1101,9 +1100,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
if (crypto_ahash_init(req))
goto clear_hash;
- if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_header(hp, th))
+ if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
goto clear_hash;
if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
goto clear_hash;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1bcef2369d64..771be1fa4176 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
}
}
+ free_percpu(non_pcpu_rt->rt6i_pcpu);
non_pcpu_rt->rt6i_pcpu = NULL;
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 63e06c3dd319..61ed95054efa 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -73,22 +73,22 @@ ip6_packet_match(const struct sk_buff *skb,
unsigned long ret;
const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
-#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg)))
-
- if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
- &ip6info->src), IP6T_INV_SRCIP) ||
- FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
- &ip6info->dst), IP6T_INV_DSTIP))
+ if (NF_INVF(ip6info, IP6T_INV_SRCIP,
+ ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
+ &ip6info->src)) ||
+ NF_INVF(ip6info, IP6T_INV_DSTIP,
+ ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
+ &ip6info->dst)))
return false;
ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
- if (FWINV(ret != 0, IP6T_INV_VIA_IN))
+ if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0))
return false;
ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
- if (FWINV(ret != 0, IP6T_INV_VIA_OUT))
+ if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0))
return false;
/* ... might want to do something with class and flowlabel here ... */
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index cb2b28883252..2b1a9dcdbcb3 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -83,10 +83,6 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
{
if (state->hook == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, state);
- if (state->hook == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, state,
- state->net->ipv6.ip6table_mangle);
- /* INPUT/FORWARD */
return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2255d2bf5f6b..37cf91323319 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -526,26 +526,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
-static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr, int nbytes)
+static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr,
+ const struct tcphdr *th, int nbytes)
{
struct tcp6_pseudohdr *bp;
struct scatterlist sg;
+ struct tcphdr *_th;
- bp = &hp->md5_blk.ip6;
+ bp = hp->scratch;
/* 1. TCP pseudo-header (RFC2460) */
bp->saddr = *saddr;
bp->daddr = *daddr;
bp->protocol = cpu_to_be32(IPPROTO_TCP);
bp->len = cpu_to_be32(nbytes);
- sg_init_one(&sg, bp, sizeof(*bp));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
+ _th = (struct tcphdr *)(bp + 1);
+ memcpy(_th, th, sizeof(*th));
+ _th->check = 0;
+
+ sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
+ ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ sizeof(*bp) + sizeof(*th));
return crypto_ahash_update(hp->md5_req);
}
-static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
const struct in6_addr *daddr, struct in6_addr *saddr,
const struct tcphdr *th)
{
@@ -559,9 +566,7 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
if (crypto_ahash_init(req))
goto clear_hash;
- if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
- goto clear_hash;
- if (tcp_md5_hash_header(hp, th))
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
goto clear_hash;
if (tcp_md5_hash_key(hp, key))
goto clear_hash;
@@ -606,9 +611,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
if (crypto_ahash_init(req))
goto clear_hash;
- if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
- goto clear_hash;
- if (tcp_md5_hash_header(hp, th))
+ if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
goto clear_hash;
if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
goto clear_hash;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 0b68ba730a06..cb39e05b166c 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1765,18 +1765,12 @@ static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
if (!csock)
return -ENOENT;
- prog = bpf_prog_get(info->bpf_fd);
+ prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER);
if (IS_ERR(prog)) {
err = PTR_ERR(prog);
goto out;
}
- if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
- bpf_prog_put(prog);
- err = -EINVAL;
- goto out;
- }
-
err = kcm_attach(sock, csock, prog);
if (err) {
bpf_prog_put(prog);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 95e757c377f9..9266ceebd112 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -609,9 +609,8 @@ config NETFILTER_XT_MARK
The target allows you to create rules in the "mangle" table which alter
the netfilter mark (nfmark) field associated with the packet.
- Prior to routing, the nfmark can influence the routing method (see
- "Use netfilter MARK value as routing key") and can also be used by
- other subsystems to change their behavior.
+ Prior to routing, the nfmark can influence the routing method and can
+ also be used by other subsystems to change their behavior.
config NETFILTER_XT_CONNMARK
tristate 'ctmark target and match support'
@@ -753,9 +752,8 @@ config NETFILTER_XT_TARGET_HMARK
The target allows you to create rules in the "raw" and "mangle" tables
which set the skbuff mark by means of hash calculation within a given
- range. The nfmark can influence the routing method (see "Use netfilter
- MARK value as routing key") and can also be used by other subsystems to
- change their behaviour.
+ range. The nfmark can influence the routing method and can also be used
+ by other subsystems to change their behaviour.
To compile it as a module, choose M here. If unsure, say N.
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f204274a9b6b..153e33ffeeaa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -327,16 +327,10 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
-
- if (nf_ct_zone_add(tmpl, flags, zone) < 0)
- goto out_free;
-
+ nf_ct_zone_add(tmpl, zone);
atomic_set(&tmpl->ct_general.use, 0);
return tmpl;
-out_free:
- kfree(tmpl);
- return NULL;
}
EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
@@ -929,16 +923,13 @@ __nf_conntrack_alloc(struct net *net,
offsetof(struct nf_conn, proto) -
offsetof(struct nf_conn, __nfct_init_offset[0]));
- if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
- goto out_free;
+ nf_ct_zone_add(ct, zone);
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
atomic_set(&ct->ct_general.use, 0);
return ct;
-out_free:
- kmem_cache_free(nf_conntrack_cachep, ct);
out:
atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
@@ -1342,14 +1333,6 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
-#ifdef CONFIG_NF_CONNTRACK_ZONES
-static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
- .len = sizeof(struct nf_conntrack_zone),
- .align = __alignof__(struct nf_conntrack_zone),
- .id = NF_CT_EXT_ZONE,
-};
-#endif
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1532,9 +1515,6 @@ void nf_conntrack_cleanup_end(void)
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- nf_ct_extend_unregister(&nf_ct_zone_extend);
-#endif
nf_conntrack_proto_fini();
nf_conntrack_seqadj_fini();
nf_conntrack_labels_fini();
@@ -1617,24 +1597,14 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
}
EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_hash_resize(unsigned int hashsize)
{
- int i, bucket, rc;
- unsigned int hashsize, old_size;
+ int i, bucket;
+ unsigned int old_size;
struct hlist_nulls_head *hash, *old_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- if (current->nsproxy->net_ns != &init_net)
- return -EOPNOTSUPP;
-
- /* On boot, we can set this without any fancy locking. */
- if (!nf_conntrack_htable_size)
- return param_set_uint(val, kp);
-
- rc = kstrtouint(val, 0, &hashsize);
- if (rc)
- return rc;
if (!hashsize)
return -EINVAL;
@@ -1642,6 +1612,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
if (!hash)
return -ENOMEM;
+ old_size = nf_conntrack_htable_size;
+ if (old_size == hashsize) {
+ nf_ct_free_hashtable(hash, hashsize);
+ return 0;
+ }
+
local_bh_disable();
nf_conntrack_all_lock();
write_seqcount_begin(&nf_conntrack_generation);
@@ -1677,6 +1653,25 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
nf_ct_free_hashtable(old_hash, old_size);
return 0;
}
+
+int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+{
+ unsigned int hashsize;
+ int rc;
+
+ if (current->nsproxy->net_ns != &init_net)
+ return -EOPNOTSUPP;
+
+ /* On boot, we can set this without any fancy locking. */
+ if (!nf_conntrack_htable_size)
+ return param_set_uint(val, kp);
+
+ rc = kstrtouint(val, 0, &hashsize);
+ if (rc)
+ return rc;
+
+ return nf_conntrack_hash_resize(hashsize);
+}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
@@ -1733,7 +1728,7 @@ int nf_conntrack_init_start(void)
nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
sizeof(struct nf_conn), 0,
- SLAB_DESTROY_BY_RCU, NULL);
+ SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL);
if (!nf_conntrack_cachep)
goto err_cachep;
@@ -1773,11 +1768,6 @@ int nf_conntrack_init_start(void)
if (ret < 0)
goto err_seqadj;
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- ret = nf_ct_extend_register(&nf_ct_zone_extend);
- if (ret < 0)
- goto err_extend;
-#endif
ret = nf_conntrack_proto_init();
if (ret < 0)
goto err_proto;
@@ -1793,10 +1783,6 @@ int nf_conntrack_init_start(void)
return 0;
err_proto:
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- nf_ct_extend_unregister(&nf_ct_zone_extend);
-err_extend:
-#endif
nf_conntrack_seqadj_fini();
err_seqadj:
nf_conntrack_labels_fini();
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 196cb39649e1..3a1a88b9bafa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -389,11 +389,38 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
struct net *net)
{
struct nf_conntrack_tuple_hash *h;
+ const struct hlist_nulls_node *nn;
+ int cpu;
+
+ /* Get rid of expecteds, set helpers to NULL. */
+ for_each_possible_cpu(cpu) {
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+ spin_lock_bh(&pcpu->lock);
+ hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
+ unhelp(h, me);
+ spin_unlock_bh(&pcpu->lock);
+ }
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
+ struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
+ struct net *net;
unsigned int i;
- int cpu;
+
+ mutex_lock(&nf_ct_helper_mutex);
+ hlist_del_rcu(&me->hnode);
+ nf_ct_helper_count--;
+ mutex_unlock(&nf_ct_helper_mutex);
+
+ /* Make sure every nothing is still using the helper unless its a
+ * connection in the hash.
+ */
+ synchronize_rcu();
/* Get rid of expectations */
spin_lock_bh(&nf_conntrack_expect_lock);
@@ -413,15 +440,11 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
spin_unlock_bh(&nf_conntrack_expect_lock);
- /* Get rid of expecteds, set helpers to NULL. */
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+ rtnl_lock();
+ for_each_net(net)
+ __nf_conntrack_helper_unregister(me, net);
+ rtnl_unlock();
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
- unhelp(h, me);
- spin_unlock_bh(&pcpu->lock);
- }
local_bh_disable();
for (i = 0; i < nf_conntrack_htable_size; i++) {
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
@@ -433,26 +456,6 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
local_bh_enable();
}
-
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
-{
- struct net *net;
-
- mutex_lock(&nf_ct_helper_mutex);
- hlist_del_rcu(&me->hnode);
- nf_ct_helper_count--;
- mutex_unlock(&nf_ct_helper_mutex);
-
- /* Make sure every nothing is still using the helper unless its a
- * connection in the hash.
- */
- synchronize_rcu();
-
- rtnl_lock();
- for_each_net(net)
- __nf_conntrack_helper_unregister(me, net);
- rtnl_unlock();
-}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
static struct nf_ct_ext_type helper_extend __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index c026c472ea80..2aaa188ee961 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -434,8 +434,29 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
#ifdef CONFIG_SYSCTL
/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
+static int log_invalid_proto_min __read_mostly;
+static int log_invalid_proto_max __read_mostly = 255;
+
+/* size the user *wants to set */
+static unsigned int nf_conntrack_htable_size_user __read_mostly;
+
+static int
+nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (ret < 0 || !write)
+ return ret;
+
+ /* update ret, we might not be able to satisfy request */
+ ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user);
+
+ /* update it to the actual value used by conntrack */
+ nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+ return ret;
+}
static struct ctl_table_header *nf_ct_netfilter_header;
@@ -456,10 +477,10 @@ static struct ctl_table nf_ct_sysctl_table[] = {
},
{
.procname = "nf_conntrack_buckets",
- .data = &nf_conntrack_htable_size,
+ .data = &nf_conntrack_htable_size_user,
.maxlen = sizeof(unsigned int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
+ .mode = 0644,
+ .proc_handler = nf_conntrack_hash_sysctl,
},
{
.procname = "nf_conntrack_checksum",
@@ -515,6 +536,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
+ if (!net_eq(&init_net, net))
+ table[2].mode = 0444;
+
net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.sysctl_header)
goto out_unregister_netfilter;
@@ -604,6 +628,8 @@ static int __init nf_conntrack_standalone_init(void)
ret = -ENOMEM;
goto out_sysctl;
}
+
+ nf_conntrack_htable_size_user = nf_conntrack_htable_size;
#endif
ret = register_pernet_subsys(&nf_conntrack_net_ops);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index a5d41dfa9f05..aa5847a16713 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -159,6 +159,20 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
struct nf_logger *logger;
int ret = -ENOENT;
+ if (pf == NFPROTO_INET) {
+ ret = nf_logger_find_get(NFPROTO_IPV4, type);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_logger_find_get(NFPROTO_IPV6, type);
+ if (ret < 0) {
+ nf_logger_put(NFPROTO_IPV4, type);
+ return ret;
+ }
+
+ return 0;
+ }
+
if (rcu_access_pointer(loggers[pf][type]) == NULL)
request_module("nf-logger-%u-%u", pf, type);
@@ -167,7 +181,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
if (logger == NULL)
goto out;
- if (logger && try_module_get(logger->me))
+ if (try_module_get(logger->me))
ret = 0;
out:
rcu_read_unlock();
@@ -179,6 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
{
struct nf_logger *logger;
+ if (pf == NFPROTO_INET) {
+ nf_logger_put(NFPROTO_IPV4, type);
+ nf_logger_put(NFPROTO_IPV6, type);
+ return;
+ }
+
BUG_ON(loggers[pf][type] == NULL);
rcu_read_lock();
@@ -398,16 +418,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
{
const struct nf_logger *logger;
char buf[NFLOGGER_NAME_LEN];
- size_t size = *lenp;
int r = 0;
int tindex = (unsigned long)table->extra1;
struct net *net = current->nsproxy->net_ns;
if (write) {
- if (size > sizeof(buf))
- size = sizeof(buf);
- if (copy_from_user(buf, buffer, size))
- return -EFAULT;
+ struct ctl_table tmp = *table;
+
+ tmp.data = buf;
+ r = proc_dostring(&tmp, write, buffer, lenp, ppos);
+ if (r)
+ return r;
if (!strcmp(buf, "NONE")) {
nf_log_unbind_pf(net, tindex);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2c881871db38..18b7f8578ee0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -131,29 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
-static int nft_register_basechain(struct nft_base_chain *basechain,
- unsigned int hook_nops)
-{
- struct net *net = read_pnet(&basechain->pnet);
-
- if (basechain->flags & NFT_BASECHAIN_DISABLED)
- return 0;
-
- return nf_register_net_hooks(net, basechain->ops, hook_nops);
-}
-
-static void nft_unregister_basechain(struct nft_base_chain *basechain,
- unsigned int hook_nops)
-{
- struct net *net = read_pnet(&basechain->pnet);
-
- if (basechain->flags & NFT_BASECHAIN_DISABLED)
- return;
-
- nf_unregister_net_hooks(net, basechain->ops, hook_nops);
-}
-
-static int nf_tables_register_hooks(const struct nft_table *table,
+static int nf_tables_register_hooks(struct net *net,
+ const struct nft_table *table,
struct nft_chain *chain,
unsigned int hook_nops)
{
@@ -161,10 +140,12 @@ static int nf_tables_register_hooks(const struct nft_table *table,
!(chain->flags & NFT_BASE_CHAIN))
return 0;
- return nft_register_basechain(nft_base_chain(chain), hook_nops);
+ return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+ hook_nops);
}
-static void nf_tables_unregister_hooks(const struct nft_table *table,
+static void nf_tables_unregister_hooks(struct net *net,
+ const struct nft_table *table,
struct nft_chain *chain,
unsigned int hook_nops)
{
@@ -172,12 +153,9 @@ static void nf_tables_unregister_hooks(const struct nft_table *table,
!(chain->flags & NFT_BASE_CHAIN))
return;
- nft_unregister_basechain(nft_base_chain(chain), hook_nops);
+ nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
}
-/* Internal table flags */
-#define NFT_TABLE_INACTIVE (1 << 15)
-
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
@@ -187,7 +165,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWTABLE)
- ctx->table->flags |= NFT_TABLE_INACTIVE;
+ nft_activate_next(ctx->net, ctx->table);
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
@@ -201,7 +179,7 @@ static int nft_deltable(struct nft_ctx *ctx)
if (err < 0)
return err;
- list_del_rcu(&ctx->table->list);
+ nft_deactivate_next(ctx->net, ctx->table);
return err;
}
@@ -214,7 +192,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
return -ENOMEM;
if (msg_type == NFT_MSG_NEWCHAIN)
- ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+ nft_activate_next(ctx->net, ctx->chain);
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
@@ -229,47 +207,17 @@ static int nft_delchain(struct nft_ctx *ctx)
return err;
ctx->table->use--;
- list_del_rcu(&ctx->chain->list);
+ nft_deactivate_next(ctx->net, ctx->chain);
return err;
}
-static inline bool
-nft_rule_is_active(struct net *net, const struct nft_rule *rule)
-{
- return (rule->genmask & nft_genmask_cur(net)) == 0;
-}
-
-static inline int
-nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
-{
- return (rule->genmask & nft_genmask_next(net)) == 0;
-}
-
-static inline void
-nft_rule_activate_next(struct net *net, struct nft_rule *rule)
-{
- /* Now inactive, will be active in the future */
- rule->genmask = nft_genmask_cur(net);
-}
-
-static inline void
-nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
-{
- rule->genmask = nft_genmask_next(net);
-}
-
-static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
-{
- rule->genmask &= ~nft_genmask_next(net);
-}
-
static int
nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
{
/* You cannot delete the same rule twice */
- if (nft_rule_is_active_next(ctx->net, rule)) {
- nft_rule_deactivate_next(ctx->net, rule);
+ if (nft_is_active_next(ctx->net, rule)) {
+ nft_deactivate_next(ctx->net, rule);
ctx->chain->use--;
return 0;
}
@@ -322,9 +270,6 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
-/* Internal set flag */
-#define NFT_SET_INACTIVE (1 << 15)
-
static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
struct nft_set *set)
{
@@ -337,7 +282,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
- set->flags |= NFT_SET_INACTIVE;
+ nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -353,7 +298,7 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
if (err < 0)
return err;
- list_del_rcu(&set->list);
+ nft_deactivate_next(ctx->net, set);
ctx->table->use--;
return err;
@@ -364,26 +309,29 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
*/
static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_table *table;
list_for_each_entry(table, &afi->tables, list) {
- if (!nla_strcmp(nla, table->name))
+ if (!nla_strcmp(nla, table->name) &&
+ nft_active_genmask(table, genmask))
return table;
}
return NULL;
}
static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_table *table;
if (nla == NULL)
return ERR_PTR(-EINVAL);
- table = nft_table_lookup(afi, nla);
+ table = nft_table_lookup(afi, nla, genmask);
if (table != NULL)
return table;
@@ -524,6 +472,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
+ if (!nft_is_active(net, table))
+ continue;
if (nf_tables_fill_table_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
@@ -548,6 +498,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
const struct nft_af_info *afi;
const struct nft_table *table;
struct sk_buff *skb2;
@@ -565,11 +516,9 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -588,17 +537,21 @@ err:
return err;
}
-static int nf_tables_table_enable(const struct nft_af_info *afi,
+static int nf_tables_table_enable(struct net *net,
+ const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
int err, i = 0;
list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
if (!(chain->flags & NFT_BASE_CHAIN))
continue;
- err = nft_register_basechain(nft_base_chain(chain), afi->nops);
+ err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+ afi->nops);
if (err < 0)
goto err;
@@ -607,26 +560,34 @@ static int nf_tables_table_enable(const struct nft_af_info *afi,
return 0;
err:
list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
if (!(chain->flags & NFT_BASE_CHAIN))
continue;
if (i-- <= 0)
break;
- nft_unregister_basechain(nft_base_chain(chain), afi->nops);
+ nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+ afi->nops);
}
return err;
}
-static void nf_tables_table_disable(const struct nft_af_info *afi,
+static void nf_tables_table_disable(struct net *net,
+ const struct nft_af_info *afi,
struct nft_table *table)
{
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list) {
- if (chain->flags & NFT_BASE_CHAIN)
- nft_unregister_basechain(nft_base_chain(chain),
- afi->nops);
+ if (!nft_is_active_next(net, chain))
+ continue;
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ continue;
+
+ nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+ afi->nops);
}
}
@@ -656,7 +617,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
nft_trans_table_enable(trans) = false;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
- ret = nf_tables_table_enable(ctx->afi, ctx->table);
+ ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table);
if (ret >= 0) {
ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
nft_trans_table_enable(trans) = true;
@@ -678,6 +639,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
const struct nlattr *name;
struct nft_af_info *afi;
struct nft_table *table;
@@ -691,7 +653,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
return PTR_ERR(afi);
name = nla[NFTA_TABLE_NAME];
- table = nf_tables_table_lookup(afi, name);
+ table = nf_tables_table_lookup(afi, name, genmask);
if (IS_ERR(table)) {
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
@@ -699,8 +661,6 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
}
if (table != NULL) {
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -752,6 +712,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
struct nft_set *set, *ns;
list_for_each_entry(chain, &ctx->table->chains, list) {
+ if (!nft_is_active_next(ctx->net, chain))
+ continue;
+
ctx->chain = chain;
err = nft_delrule_by_chain(ctx);
@@ -760,6 +723,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
}
list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
+ if (!nft_is_active_next(ctx->net, set))
+ continue;
+
if (set->flags & NFT_SET_ANONYMOUS &&
!list_empty(&set->bindings))
continue;
@@ -770,6 +736,9 @@ static int nft_flush_table(struct nft_ctx *ctx)
}
list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
+ if (!nft_is_active_next(ctx->net, chain))
+ continue;
+
ctx->chain = chain;
err = nft_delchain(ctx);
@@ -795,6 +764,9 @@ static int nft_flush(struct nft_ctx *ctx, int family)
ctx->afi = afi;
list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ if (!nft_is_active_next(ctx->net, table))
+ continue;
+
if (nla[NFTA_TABLE_NAME] &&
nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
continue;
@@ -815,6 +787,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_af_info *afi;
struct nft_table *table;
int family = nfmsg->nfgen_family;
@@ -828,7 +801,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -875,12 +848,14 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
*/
static struct nft_chain *
-nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle,
+ u8 genmask)
{
struct nft_chain *chain;
list_for_each_entry(chain, &table->chains, list) {
- if (chain->handle == handle)
+ if (chain->handle == handle &&
+ nft_active_genmask(chain, genmask))
return chain;
}
@@ -888,7 +863,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
}
static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_chain *chain;
@@ -896,7 +872,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
return ERR_PTR(-EINVAL);
list_for_each_entry(chain, &table->chains, list) {
- if (!nla_strcmp(nla, chain->name))
+ if (!nla_strcmp(nla, chain->name) &&
+ nft_active_genmask(chain, genmask))
return chain;
}
@@ -1079,6 +1056,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
+ if (!nft_is_active(net, chain))
+ continue;
if (nf_tables_fill_chain_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
@@ -1104,6 +1083,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
@@ -1122,17 +1102,13 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->flags & NFT_CHAIN_INACTIVE)
- return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -1231,6 +1207,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
struct nlattr *ha[NFTA_HOOK_MAX + 1];
+ u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
@@ -1247,7 +1224,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1256,11 +1233,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nla[NFTA_CHAIN_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
- chain = nf_tables_chain_lookup_byhandle(table, handle);
+ chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
} else {
- chain = nf_tables_chain_lookup(table, name);
+ chain = nf_tables_chain_lookup(table, name, genmask);
if (IS_ERR(chain)) {
if (PTR_ERR(chain) != -ENOENT)
return PTR_ERR(chain);
@@ -1291,16 +1268,20 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_stats *stats = NULL;
struct nft_trans *trans;
- if (chain->flags & NFT_CHAIN_INACTIVE)
- return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- if (nla[NFTA_CHAIN_HANDLE] && name &&
- !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
- return -EEXIST;
+ if (nla[NFTA_CHAIN_HANDLE] && name) {
+ struct nft_chain *chain2;
+
+ chain2 = nf_tables_chain_lookup(table,
+ nla[NFTA_CHAIN_NAME],
+ genmask);
+ if (IS_ERR(chain2))
+ return PTR_ERR(chain2);
+ }
if (nla[NFTA_CHAIN_COUNTERS]) {
if (!(chain->flags & NFT_BASE_CHAIN))
@@ -1455,7 +1436,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
chain->table = table;
nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
- err = nf_tables_register_hooks(table, chain, afi->nops);
+ err = nf_tables_register_hooks(net, table, chain, afi->nops);
if (err < 0)
goto err1;
@@ -1468,7 +1449,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
list_add_tail_rcu(&chain->list, &table->chains);
return 0;
err2:
- nf_tables_unregister_hooks(table, chain, afi->nops);
+ nf_tables_unregister_hooks(net, table, chain, afi->nops);
err1:
nf_tables_chain_destroy(chain);
return err;
@@ -1479,6 +1460,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
@@ -1489,11 +1471,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
if (chain->use > 0)
@@ -1898,7 +1880,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
list_for_each_entry_rcu(table, &afi->tables, list) {
list_for_each_entry_rcu(chain, &table->chains, list) {
list_for_each_entry_rcu(rule, &chain->rules, list) {
- if (!nft_rule_is_active(net, rule))
+ if (!nft_is_active(net, rule))
goto cont;
if (idx < s_idx)
goto cont;
@@ -1931,6 +1913,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_cur(net);
const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
@@ -1950,17 +1933,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
- chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->flags & NFT_CHAIN_INACTIVE)
- return -ENOENT;
rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule))
@@ -2009,6 +1988,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
@@ -2029,11 +2009,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
- chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
@@ -2102,7 +2082,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (rule == NULL)
goto err1;
- nft_rule_activate_next(net, rule);
+ nft_activate_next(net, rule);
rule->handle = handle;
rule->dlen = size;
@@ -2124,14 +2104,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
- if (nft_rule_is_active_next(net, old_rule)) {
+ if (nft_is_active_next(net, old_rule)) {
trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
old_rule);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
- nft_rule_deactivate_next(net, old_rule);
+ nft_deactivate_next(net, old_rule);
chain->use--;
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
@@ -2174,6 +2154,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain = NULL;
@@ -2185,12 +2166,13 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
if (nla[NFTA_RULE_CHAIN]) {
- chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN],
+ genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
}
@@ -2210,6 +2192,9 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
}
} else {
list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
+
ctx.chain = chain;
err = nft_delrule_by_chain(&ctx);
if (err < 0)
@@ -2339,7 +2324,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ u8 genmask)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi = NULL;
@@ -2355,7 +2341,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
if (afi == NULL)
return -EAFNOSUPPORT;
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE],
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
}
@@ -2365,7 +2352,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
}
struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
- const struct nlattr *nla)
+ const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -2373,22 +2360,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
return ERR_PTR(-EINVAL);
list_for_each_entry(set, &table->sets, list) {
- if (!nla_strcmp(nla, set->name))
+ if (!nla_strcmp(nla, set->name) &&
+ nft_active_genmask(set, genmask))
return set;
}
return ERR_PTR(-ENOENT);
}
struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ u8 genmask)
{
struct nft_trans *trans;
u32 id = ntohl(nla_get_be32(nla));
list_for_each_entry(trans, &net->nft.commit_list, list) {
+ struct nft_set *set = nft_trans_set(trans);
+
if (trans->msg_type == NFT_MSG_NEWSET &&
- id == nft_trans_set_id(trans))
- return nft_trans_set(trans);
+ id == nft_trans_set_id(trans) &&
+ nft_active_genmask(set, genmask))
+ return set;
}
return ERR_PTR(-ENOENT);
}
@@ -2413,6 +2405,8 @@ cont:
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
+ if (!nft_is_active_next(ctx->net, set))
+ continue;
if (!sscanf(i->name, name, &tmp))
continue;
if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
@@ -2432,6 +2426,8 @@ cont:
snprintf(set->name, sizeof(set->name), name, min + n);
list_for_each_entry(i, &ctx->table->sets, list) {
+ if (!nft_is_active_next(ctx->net, i))
+ continue;
if (!strcmp(set->name, i->name))
return -ENFILE;
}
@@ -2580,6 +2576,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
list_for_each_entry_rcu(set, &table->sets, list) {
if (idx < s_idx)
goto cont;
+ if (!nft_is_active(net, set))
+ goto cont;
ctx_set = *ctx;
ctx_set.table = table;
@@ -2616,6 +2614,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ u8 genmask = nft_genmask_cur(net);
const struct nft_set *set;
struct nft_ctx ctx;
struct sk_buff *skb2;
@@ -2623,7 +2622,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
int err;
/* Verify existence before starting dump */
- err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
@@ -2650,11 +2649,9 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
if (!nla[NFTA_SET_TABLE])
return -EINVAL;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_INACTIVE)
- return -ENOENT;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb2 == NULL)
@@ -2693,6 +2690,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
const struct nft_set_ops *ops;
struct nft_af_info *afi;
struct nft_table *table;
@@ -2790,13 +2788,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask);
if (IS_ERR(table))
return PTR_ERR(table);
nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
- set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
+ set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
if (PTR_ERR(set) != -ENOENT)
return PTR_ERR(set);
@@ -2895,6 +2893,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ u8 genmask = nft_genmask_next(net);
struct nft_set *set;
struct nft_ctx ctx;
int err;
@@ -2904,11 +2903,11 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
if (!list_empty(&set->bindings))
@@ -2973,7 +2972,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
list_del_rcu(&binding->list);
if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
- !(set->flags & NFT_SET_INACTIVE))
+ nft_is_active(ctx->net, set))
nf_tables_set_destroy(ctx, set);
}
@@ -3029,7 +3028,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[])
+ const struct nlattr * const nla[],
+ u8 genmask)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
@@ -3039,7 +3039,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
if (IS_ERR(afi))
return PTR_ERR(afi);
- table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE],
+ genmask);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -3136,6 +3137,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ u8 genmask = nft_genmask_cur(net);
const struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
@@ -3152,17 +3154,14 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
return err;
err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
- (void *)nla);
+ (void *)nla, genmask);
if (err < 0)
return err;
- if (ctx.table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_INACTIVE)
- return -ENOENT;
event = NFT_MSG_NEWSETELEM;
event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -3216,21 +3215,19 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ u8 genmask = nft_genmask_cur(net);
const struct nft_set *set;
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
- if (ctx.table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_INACTIVE)
- return -ENOENT;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -3548,6 +3545,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ u8 genmask = nft_genmask_next(net);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3556,15 +3554,17 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
if (IS_ERR(set)) {
if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
set = nf_tables_set_lookup_byid(net,
- nla[NFTA_SET_ELEM_LIST_SET_ID]);
+ nla[NFTA_SET_ELEM_LIST_SET_ID],
+ genmask);
}
if (IS_ERR(set))
return PTR_ERR(set);
@@ -3670,6 +3670,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ u8 genmask = nft_genmask_next(net);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3678,11 +3679,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask);
if (err < 0)
return err;
- set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ genmask);
if (IS_ERR(set))
return PTR_ERR(set);
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
@@ -3950,36 +3952,40 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
if (!nft_trans_table_enable(trans)) {
- nf_tables_table_disable(trans->ctx.afi,
+ nf_tables_table_disable(net,
+ trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
} else {
- trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+ nft_clear(net, trans->ctx.table);
}
nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELTABLE:
+ list_del_rcu(&trans->ctx.table->list);
nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans))
nft_chain_commit_update(trans);
else
- trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+ nft_clear(net, trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELCHAIN:
+ list_del_rcu(&trans->ctx.chain->list);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
- nf_tables_unregister_hooks(trans->ctx.table,
+ nf_tables_unregister_hooks(trans->ctx.net,
+ trans->ctx.table,
trans->ctx.chain,
trans->ctx.afi->nops);
break;
case NFT_MSG_NEWRULE:
- nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_clear(trans->ctx.net, nft_trans_rule(trans));
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_NEWRULE);
@@ -3992,7 +3998,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
NFT_MSG_DELRULE);
break;
case NFT_MSG_NEWSET:
- nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+ nft_clear(net, nft_trans_set(trans));
/* This avoids hitting -EBUSY when deleting the table
* from the transaction.
*/
@@ -4005,6 +4011,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSET:
+ list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_DELSET, GFP_KERNEL);
break;
@@ -4076,7 +4083,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
if (nft_trans_table_enable(trans)) {
- nf_tables_table_disable(trans->ctx.afi,
+ nf_tables_table_disable(net,
+ trans->ctx.afi,
trans->ctx.table);
trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
}
@@ -4086,8 +4094,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
}
break;
case NFT_MSG_DELTABLE:
- list_add_tail_rcu(&trans->ctx.table->list,
- &trans->ctx.afi->tables);
+ nft_clear(trans->ctx.net, trans->ctx.table);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWCHAIN:
@@ -4098,15 +4105,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
} else {
trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list);
- nf_tables_unregister_hooks(trans->ctx.table,
+ nf_tables_unregister_hooks(trans->ctx.net,
+ trans->ctx.table,
trans->ctx.chain,
trans->ctx.afi->nops);
}
break;
case NFT_MSG_DELCHAIN:
trans->ctx.table->use++;
- list_add_tail_rcu(&trans->ctx.chain->list,
- &trans->ctx.table->chains);
+ nft_clear(trans->ctx.net, trans->ctx.chain);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWRULE:
@@ -4115,7 +4122,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELRULE:
trans->ctx.chain->use++;
- nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+ nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
@@ -4124,8 +4131,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSET:
trans->ctx.table->use++;
- list_add_tail_rcu(&nft_trans_set(trans)->list,
- &trans->ctx.table->sets);
+ nft_clear(trans->ctx.net, nft_trans_set(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -4272,6 +4278,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
}
list_for_each_entry(set, &ctx->table->sets, list) {
+ if (!nft_is_active_next(ctx->net, set))
+ continue;
if (!(set->flags & NFT_SET_MAP) ||
set->dtype != NFT_DATA_VERDICT)
continue;
@@ -4430,6 +4438,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
struct nft_data_desc *desc, const struct nlattr *nla)
{
+ u8 genmask = nft_genmask_next(ctx->net);
struct nlattr *tb[NFTA_VERDICT_MAX + 1];
struct nft_chain *chain;
int err;
@@ -4462,7 +4471,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CHAIN])
return -EINVAL;
chain = nf_tables_chain_lookup(ctx->table,
- tb[NFTA_VERDICT_CHAIN]);
+ tb[NFTA_VERDICT_CHAIN], genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
if (chain->flags & NFT_BASE_CHAIN)
@@ -4640,7 +4649,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN));
- nf_tables_unregister_hooks(ctx->chain->table, ctx->chain,
+ nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
ctx->afi->nops);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
@@ -4669,7 +4678,8 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
list_for_each_entry_safe(table, nt, &afi->tables, list) {
list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hooks(table, chain, afi->nops);
+ nf_tables_unregister_hooks(net, table, chain,
+ afi->nops);
/* No packets are walking on these chains anymore. */
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11f81c8385fc..cbcfdfb586a6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -700,10 +700,13 @@ nfulnl_log_packet(struct net *net,
break;
case NFULNL_COPY_PACKET:
- if (inst->copy_range > skb->len)
+ data_len = inst->copy_range;
+ if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
+ (li->u.ulog.copy_len < data_len))
+ data_len = li->u.ulog.copy_len;
+
+ if (data_len > skb->len)
data_len = skb->len;
- else
- data_len = inst->copy_range;
size += nla_total_size(data_len);
break;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 78d4914fb39c..0af26699bf04 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -103,6 +103,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_dynset *priv = nft_expr_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
struct nft_set *set;
u64 timeout;
int err;
@@ -112,11 +113,13 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
tb[NFTA_DYNSET_SREG_KEY] == NULL)
return -EINVAL;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
+ genmask);
if (IS_ERR(set)) {
if (tb[NFTA_DYNSET_SET_ID])
set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_DYNSET_SET_ID]);
+ tb[NFTA_DYNSET_SET_ID],
+ genmask);
if (IS_ERR(set))
return PTR_ERR(set);
}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index f39c53a159eb..ea924816b7b8 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -153,9 +153,10 @@ static void *nft_hash_deactivate(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->pnet);
struct nft_hash_elem *he;
struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
};
@@ -163,7 +164,8 @@ static void *nft_hash_deactivate(const struct nft_set *set,
rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
if (he != NULL) {
- if (!nft_set_elem_mark_busy(&he->ext))
+ if (!nft_set_elem_mark_busy(&he->ext) ||
+ !nft_is_active(net, &he->ext))
nft_set_elem_change_active(set, &he->ext);
else
he = NULL;
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 319c22b4bca2..713d66837705 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -52,7 +52,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nft_log *priv = nft_expr_priv(expr);
struct nf_loginfo *li = &priv->loginfo;
const struct nlattr *nla;
- int ret;
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
@@ -97,19 +96,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
break;
}
- if (ctx->afi->family == NFPROTO_INET) {
- ret = nf_logger_find_get(NFPROTO_IPV4, li->type);
- if (ret < 0)
- return ret;
-
- ret = nf_logger_find_get(NFPROTO_IPV6, li->type);
- if (ret < 0) {
- nf_logger_put(NFPROTO_IPV4, li->type);
- return ret;
- }
- return 0;
- }
-
return nf_logger_find_get(ctx->afi->family, li->type);
}
@@ -122,12 +108,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix);
- if (ctx->afi->family == NFPROTO_INET) {
- nf_logger_put(NFPROTO_IPV4, li->type);
- nf_logger_put(NFPROTO_IPV6, li->type);
- } else {
- nf_logger_put(ctx->afi->family, li->type);
- }
+ nf_logger_put(ctx->afi->family, li->type);
}
static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index b3c31ef8015d..b8d18f598569 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -22,6 +22,7 @@ struct nft_lookup {
struct nft_set *set;
enum nft_registers sreg:8;
enum nft_registers dreg:8;
+ bool invert;
struct nft_set_binding binding;
};
@@ -32,14 +33,20 @@ static void nft_lookup_eval(const struct nft_expr *expr,
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
const struct nft_set_ext *ext;
+ bool found;
- if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), set->dlen);
+ found = set->ops->lookup(set, &regs->data[priv->sreg], &ext) ^
+ priv->invert;
+
+ if (!found) {
+ regs->verdict.code = NFT_BREAK;
return;
}
- regs->verdict.code = NFT_BREAK;
+
+ if (found && set->flags & NFT_SET_MAP)
+ nft_data_copy(&regs->data[priv->dreg],
+ nft_set_ext_data(ext), set->dlen);
+
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
@@ -47,6 +54,7 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
[NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 },
[NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
[NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
+ [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 },
};
static int nft_lookup_init(const struct nft_ctx *ctx,
@@ -54,18 +62,21 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_lookup *priv = nft_expr_priv(expr);
+ u8 genmask = nft_genmask_next(ctx->net);
struct nft_set *set;
+ u32 flags;
int err;
if (tb[NFTA_LOOKUP_SET] == NULL ||
tb[NFTA_LOOKUP_SREG] == NULL)
return -EINVAL;
- set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask);
if (IS_ERR(set)) {
if (tb[NFTA_LOOKUP_SET_ID]) {
set = nf_tables_set_lookup_byid(ctx->net,
- tb[NFTA_LOOKUP_SET_ID]);
+ tb[NFTA_LOOKUP_SET_ID],
+ genmask);
}
if (IS_ERR(set))
return PTR_ERR(set);
@@ -79,7 +90,22 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
+ if (tb[NFTA_LOOKUP_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS]));
+
+ if (flags & ~NFT_LOOKUP_F_INV)
+ return -EINVAL;
+
+ if (flags & NFT_LOOKUP_F_INV) {
+ if (set->flags & NFT_SET_MAP)
+ return -EINVAL;
+ priv->invert = true;
+ }
+ }
+
if (tb[NFTA_LOOKUP_DREG] != NULL) {
+ if (priv->invert)
+ return -EINVAL;
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;
@@ -112,6 +138,7 @@ static void nft_lookup_destroy(const struct nft_ctx *ctx,
static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
+ u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0;
if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
goto nla_put_failure;
@@ -120,6 +147,8 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (priv->set->flags & NFT_SET_MAP)
if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 16c50b0dd426..03e5e33b5c39 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -199,13 +199,6 @@ err:
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
-/* don't change or set _LOOPBACK, _USER, etc. */
-static bool pkt_type_ok(u32 p)
-{
- return p == PACKET_HOST || p == PACKET_BROADCAST ||
- p == PACKET_MULTICAST || p == PACKET_OTHERHOST;
-}
-
void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -223,7 +216,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
break;
case NFT_META_PKTTYPE:
if (skb->pkt_type != value &&
- pkt_type_ok(value) && pkt_type_ok(skb->pkt_type))
+ skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type))
skb->pkt_type = value;
break;
case NFT_META_NFTRACE:
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 7201d57b5a93..c0f638745adc 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -170,7 +170,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
struct nft_rbtree_elem *rbe, *this = elem->priv;
- u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
+ u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
int d;
while (parent != NULL) {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2675d580c490..fe0e2db632c7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1460,6 +1460,9 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
uint8_t hooknum;
struct nf_hook_ops *ops;
+ if (!num_hooks)
+ return ERR_PTR(-EINVAL);
+
ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
if (ops == NULL)
return ERR_PTR(-ENOMEM);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a1fa2c800cb9..018eed7e1ff1 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -33,6 +33,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
+ if (info->flags & XT_NFLOG_F_COPY_LEN)
+ li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
+
nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
par->out, &li, info->prefix);
return XT_CONTINUE;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index df48967af382..858d189a1303 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -4,12 +4,23 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
MODULE_DESCRIPTION("Xtables: packet flow tracing");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_TRACE");
MODULE_ALIAS("ip6t_TRACE");
+static int trace_tg_check(const struct xt_tgchk_param *par)
+{
+ return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+}
+
+static void trace_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_logger_put(par->family, NF_LOG_TYPE_LOG);
+}
+
static unsigned int
trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -18,12 +29,14 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
}
static struct xt_target trace_tg_reg __read_mostly = {
- .name = "TRACE",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .table = "raw",
- .target = trace_tg,
- .me = THIS_MODULE,
+ .name = "TRACE",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .table = "raw",
+ .target = trace_tg,
+ .checkentry = trace_tg_check,
+ .destroy = trace_tg_destroy,
+ .me = THIS_MODULE,
};
static int __init trace_tg_init(void)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 1302b475abcb..a20e731b5b6c 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -21,11 +21,39 @@
static int owner_check(const struct xt_mtchk_param *par)
{
struct xt_owner_match_info *info = par->matchinfo;
+ struct net *net = par->net;
- /* For now only allow adding matches from the initial user namespace */
+ /* Only allow the common case where the userns of the writer
+ * matches the userns of the network namespace.
+ */
if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
- (current_user_ns() != &init_user_ns))
+ (current_user_ns() != net->user_ns))
return -EINVAL;
+
+ /* Ensure the uids are valid */
+ if (info->match & XT_OWNER_UID) {
+ kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+ kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
+
+ if (!uid_valid(uid_min) || !uid_valid(uid_max) ||
+ (info->uid_max < info->uid_min) ||
+ uid_lt(uid_max, uid_min)) {
+ return -EINVAL;
+ }
+ }
+
+ /* Ensure the gids are valid */
+ if (info->match & XT_OWNER_GID) {
+ kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+ kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
+
+ if (!gid_valid(gid_min) || !gid_valid(gid_max) ||
+ (info->gid_max < info->gid_min) ||
+ gid_lt(gid_max, gid_min)) {
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -35,6 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_owner_match_info *info = par->matchinfo;
const struct file *filp;
struct sock *sk = skb_to_full_sk(skb);
+ struct net *net = par->net;
if (sk == NULL || sk->sk_socket == NULL)
return (info->match ^ info->invert) == 0;
@@ -51,8 +80,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
(XT_OWNER_UID | XT_OWNER_GID)) == 0;
if (info->match & XT_OWNER_UID) {
- kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
- kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
+ kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+ kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
uid_lte(filp->f_cred->fsuid, uid_max)) ^
!(info->invert & XT_OWNER_UID))
@@ -60,8 +89,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
if (info->match & XT_OWNER_GID) {
- kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
- kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
+ kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+ kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
gid_lte(filp->f_cred->fsgid, gid_max)) ^
!(info->invert & XT_OWNER_GID))
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index c14d4645daa3..ade024c90f4f 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -83,8 +83,6 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
-#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg)))
-
th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
/* We've been asked to examine this packet, and we
@@ -102,9 +100,8 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
ntohs(th->dest),
!!(tcpinfo->invflags & XT_TCP_INV_DSTPT)))
return false;
- if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
- == tcpinfo->flg_cmp,
- XT_TCP_INV_FLAGS))
+ if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS,
+ (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp))
return false;
if (tcpinfo->option) {
if (th->doff * 4 < sizeof(_tcph)) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d1f3b9e977e5..9d92c4c46871 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
- return reciprocal_scale(skb_get_hash(skb), num);
+ return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
}
static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1588,13 +1588,9 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
if (copy_from_user(&fd, data, len))
return -EFAULT;
- new = bpf_prog_get(fd);
+ new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
if (IS_ERR(new))
return PTR_ERR(new);
- if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) {
- bpf_prog_put(new);
- return -EINVAL;
- }
__fanout_set_data_bpf(po->fanout, new);
return 0;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index a4b07c899d89..19a4fee5f4dd 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -253,9 +253,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
for (i = 0; i < RDS_MPATH_WORKERS; i++) {
cp = &conn->c_path[i];
- trans->conn_free(cp->cp_transport_data);
- if (!trans->t_mp_capable)
- break;
+ /* The ->conn_alloc invocation may have
+ * allocated resource for all paths, so all
+ * of them may have to be freed here.
+ */
+ if (cp->cp_transport_data)
+ trans->conn_free(cp->cp_transport_data);
}
kmem_cache_free(rds_conn_slab, conn);
conn = found;
@@ -326,10 +329,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
wait_event(cp->cp_waitq,
!test_bit(RDS_RECV_REFILL, &cp->cp_flags));
- if (!conn->c_trans->t_mp_capable)
- conn->c_trans->conn_shutdown(conn);
- else
- conn->c_trans->conn_path_shutdown(cp);
+ conn->c_trans->conn_path_shutdown(cp);
rds_conn_path_reset(cp);
if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
@@ -355,9 +355,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock();
- if (conn->c_trans->t_type != RDS_TRANS_TCP ||
- cp->cp_outgoing == 1)
- rds_queue_reconnect(cp);
+ rds_queue_reconnect(cp);
} else {
rcu_read_unlock();
}
@@ -370,6 +368,9 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
{
struct rds_message *rm, *rtmp;
+ if (!cp->cp_transport_data)
+ return;
+
rds_conn_path_drop(cp);
flush_work(&cp->cp_down_w);
@@ -401,6 +402,8 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
void rds_conn_destroy(struct rds_connection *conn)
{
unsigned long flags;
+ int i;
+ struct rds_conn_path *cp;
rdsdebug("freeing conn %p for %pI4 -> "
"%pI4\n", conn, &conn->c_laddr,
@@ -413,18 +416,10 @@ void rds_conn_destroy(struct rds_connection *conn)
synchronize_rcu();
/* shut the connection down */
- if (!conn->c_trans->t_mp_capable) {
- rds_conn_path_destroy(&conn->c_path[0]);
- BUG_ON(!list_empty(&conn->c_path[0].cp_retrans));
- } else {
- int i;
- struct rds_conn_path *cp;
-
- for (i = 0; i < RDS_MPATH_WORKERS; i++) {
- cp = &conn->c_path[i];
- rds_conn_path_destroy(cp);
- BUG_ON(!list_empty(&cp->cp_retrans));
- }
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ cp = &conn->c_path[i];
+ rds_conn_path_destroy(cp);
+ BUG_ON(!list_empty(&cp->cp_retrans));
}
/*
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 44946a681a8c..7eaf887e46f8 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -381,15 +381,15 @@ void rds_ib_exit(void)
struct rds_transport rds_ib_transport = {
.laddr_check = rds_ib_laddr_check,
- .xmit_complete = rds_ib_xmit_complete,
+ .xmit_path_complete = rds_ib_xmit_path_complete,
.xmit = rds_ib_xmit,
.xmit_rdma = rds_ib_xmit_rdma,
.xmit_atomic = rds_ib_xmit_atomic,
- .recv = rds_ib_recv,
+ .recv_path = rds_ib_recv_path,
.conn_alloc = rds_ib_conn_alloc,
.conn_free = rds_ib_conn_free,
- .conn_connect = rds_ib_conn_connect,
- .conn_shutdown = rds_ib_conn_shutdown,
+ .conn_path_connect = rds_ib_conn_path_connect,
+ .conn_path_shutdown = rds_ib_conn_path_shutdown,
.inc_copy_to_user = rds_ib_inc_copy_to_user,
.inc_free = rds_ib_inc_free,
.cm_initiate_connect = rds_ib_cm_initiate_connect,
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 627fb79aee65..046f7508c06b 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -328,8 +328,8 @@ extern struct list_head ib_nodev_conns;
/* ib_cm.c */
int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
void rds_ib_conn_free(void *arg);
-int rds_ib_conn_connect(struct rds_connection *conn);
-void rds_ib_conn_shutdown(struct rds_connection *conn);
+int rds_ib_conn_path_connect(struct rds_conn_path *cp);
+void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
void rds_ib_state_change(struct sock *sk);
int rds_ib_listen_init(void);
void rds_ib_listen_stop(void);
@@ -354,7 +354,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
/* ib_recv.c */
int rds_ib_recv_init(void);
void rds_ib_recv_exit(void);
-int rds_ib_recv(struct rds_connection *conn);
+int rds_ib_recv_path(struct rds_conn_path *conn);
int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
@@ -384,7 +384,7 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
extern wait_queue_head_t rds_ib_ring_empty_wait;
/* ib_send.c */
-void rds_ib_xmit_complete(struct rds_connection *conn);
+void rds_ib_xmit_path_complete(struct rds_conn_path *cp);
int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg, unsigned int off);
void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index e48bb1ba3dfc..5b2ab95afa07 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -685,8 +685,9 @@ out:
return ret;
}
-int rds_ib_conn_connect(struct rds_connection *conn)
+int rds_ib_conn_path_connect(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
struct rds_ib_connection *ic = conn->c_transport_data;
struct sockaddr_in src, dest;
int ret;
@@ -731,8 +732,9 @@ out:
* so that it can be called at any point during startup. In fact it
* can be called multiple times for a given connection.
*/
-void rds_ib_conn_shutdown(struct rds_connection *conn)
+void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
struct rds_ib_connection *ic = conn->c_transport_data;
int err = 0;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 4ea8cb17cc7a..606a11f681d2 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -1009,8 +1009,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
}
-int rds_ib_recv(struct rds_connection *conn)
+int rds_ib_recv_path(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
struct rds_ib_connection *ic = conn->c_transport_data;
int ret = 0;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 6e4110aa5135..84d90c97332f 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -980,8 +980,9 @@ out:
return ret;
}
-void rds_ib_xmit_complete(struct rds_connection *conn)
+void rds_ib_xmit_path_complete(struct rds_conn_path *cp)
{
+ struct rds_connection *conn = cp->cp_conn;
struct rds_ib_connection *ic = conn->c_transport_data;
/* We may have a pending ACK or window update we were unable
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 15f83db78f0c..f2bf78de5688 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -102,7 +102,7 @@ static void rds_loop_inc_free(struct rds_incoming *inc)
}
/* we need to at least give the thread something to succeed */
-static int rds_loop_recv(struct rds_connection *conn)
+static int rds_loop_recv_path(struct rds_conn_path *cp)
{
return 0;
}
@@ -150,13 +150,13 @@ static void rds_loop_conn_free(void *arg)
kfree(lc);
}
-static int rds_loop_conn_connect(struct rds_connection *conn)
+static int rds_loop_conn_path_connect(struct rds_conn_path *cp)
{
- rds_connect_complete(conn);
+ rds_connect_complete(cp->cp_conn);
return 0;
}
-static void rds_loop_conn_shutdown(struct rds_connection *conn)
+static void rds_loop_conn_path_shutdown(struct rds_conn_path *cp)
{
}
@@ -185,11 +185,11 @@ void rds_loop_exit(void)
*/
struct rds_transport rds_loop_transport = {
.xmit = rds_loop_xmit,
- .recv = rds_loop_recv,
+ .recv_path = rds_loop_recv_path,
.conn_alloc = rds_loop_conn_alloc,
.conn_free = rds_loop_conn_free,
- .conn_connect = rds_loop_conn_connect,
- .conn_shutdown = rds_loop_conn_shutdown,
+ .conn_path_connect = rds_loop_conn_path_connect,
+ .conn_path_shutdown = rds_loop_conn_path_shutdown,
.inc_copy_to_user = rds_message_inc_copy_to_user,
.inc_free = rds_loop_inc_free,
.t_name = "loopback",
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 2e35b738176f..6ef07bd27227 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -454,18 +454,15 @@ struct rds_transport {
int (*laddr_check)(struct net *net, __be32 addr);
int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
void (*conn_free)(void *data);
- int (*conn_connect)(struct rds_connection *conn);
- void (*conn_shutdown)(struct rds_connection *conn);
+ int (*conn_path_connect)(struct rds_conn_path *cp);
void (*conn_path_shutdown)(struct rds_conn_path *conn);
- void (*xmit_prepare)(struct rds_connection *conn);
void (*xmit_path_prepare)(struct rds_conn_path *cp);
- void (*xmit_complete)(struct rds_connection *conn);
void (*xmit_path_complete)(struct rds_conn_path *cp);
int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg, unsigned int off);
int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
- int (*recv)(struct rds_connection *conn);
+ int (*recv_path)(struct rds_conn_path *cp);
int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to);
void (*inc_free)(struct rds_incoming *inc);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index b58f50571782..fed53a6c2890 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -226,6 +226,10 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
+ if (inc->i_hdr.h_sport == 0) {
+ rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr);
+ goto out;
+ }
rds_stats_inc(s_recv_ping);
rds_send_pong(cp, inc->i_hdr.h_sport);
goto out;
diff --git a/net/rds/send.c b/net/rds/send.c
index ee43d6b2ea8f..5a9caf1da896 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -183,12 +183,8 @@ restart:
goto out;
}
- if (conn->c_trans->t_mp_capable) {
- if (conn->c_trans->xmit_path_prepare)
- conn->c_trans->xmit_path_prepare(cp);
- } else if (conn->c_trans->xmit_prepare) {
- conn->c_trans->xmit_prepare(conn);
- }
+ if (conn->c_trans->xmit_path_prepare)
+ conn->c_trans->xmit_path_prepare(cp);
/*
* spin trying to push headers and data down the connection until
@@ -403,12 +399,8 @@ restart:
}
over_batch:
- if (conn->c_trans->t_mp_capable) {
- if (conn->c_trans->xmit_path_complete)
- conn->c_trans->xmit_path_complete(cp);
- } else if (conn->c_trans->xmit_complete) {
- conn->c_trans->xmit_complete(conn);
- }
+ if (conn->c_trans->xmit_path_complete)
+ conn->c_trans->xmit_path_complete(cp);
release_in_xmit(cp);
/* Nuke any messages we decided not to retransmit. */
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5217d49ce6d6..d24f6c142d03 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -136,9 +136,9 @@ void rds_tcp_restore_callbacks(struct socket *sock,
* from being called while it isn't set.
*/
void rds_tcp_reset_callbacks(struct socket *sock,
- struct rds_connection *conn)
+ struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct socket *osock = tc->t_sock;
if (!osock)
@@ -148,8 +148,8 @@ void rds_tcp_reset_callbacks(struct socket *sock,
* We have an outstanding SYN to this peer, which may
* potentially have transitioned to the RDS_CONN_UP state,
* so we must quiesce any send threads before resetting
- * c_transport_data. We quiesce these threads by setting
- * c_state to something other than RDS_CONN_UP, and then
+ * cp_transport_data. We quiesce these threads by setting
+ * cp_state to something other than RDS_CONN_UP, and then
* waiting for any existing threads in rds_send_xmit to
* complete release_in_xmit(). (Subsequent threads entering
* rds_send_xmit() will bail on !rds_conn_up().
@@ -164,8 +164,8 @@ void rds_tcp_reset_callbacks(struct socket *sock,
* RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
* cannot mark rds_conn_path_up() in the window before lock_sock()
*/
- atomic_set(&conn->c_state, RDS_CONN_RESETTING);
- wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
+ atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
+ wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
lock_sock(osock->sk);
/* reset receive side state for rds_tcp_data_recv() for osock */
if (tc->t_tinc) {
@@ -186,11 +186,12 @@ void rds_tcp_reset_callbacks(struct socket *sock,
release_sock(osock->sk);
sock_release(osock);
newsock:
- rds_send_path_reset(&conn->c_path[0]);
+ rds_send_path_reset(cp);
lock_sock(sock->sk);
write_lock_bh(&sock->sk->sk_callback_lock);
tc->t_sock = sock;
- sock->sk->sk_user_data = conn;
+ tc->t_cpath = cp;
+ sock->sk->sk_user_data = cp;
sock->sk->sk_data_ready = rds_tcp_data_ready;
sock->sk->sk_write_space = rds_tcp_write_space;
sock->sk->sk_state_change = rds_tcp_state_change;
@@ -203,9 +204,9 @@ newsock:
* above rds_tcp_reset_callbacks for notes about synchronization
* with data path
*/
-void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
write_lock_bh(&sock->sk->sk_callback_lock);
@@ -221,12 +222,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
sock->sk->sk_data_ready = sock->sk->sk_user_data;
tc->t_sock = sock;
- tc->conn = conn;
+ tc->t_cpath = cp;
tc->t_orig_data_ready = sock->sk->sk_data_ready;
tc->t_orig_write_space = sock->sk->sk_write_space;
tc->t_orig_state_change = sock->sk->sk_state_change;
- sock->sk->sk_user_data = conn;
+ sock->sk->sk_user_data = cp;
sock->sk->sk_data_ready = rds_tcp_data_ready;
sock->sk->sk_write_space = rds_tcp_write_space;
sock->sk->sk_state_change = rds_tcp_state_change;
@@ -284,24 +285,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)
static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
{
struct rds_tcp_connection *tc;
+ int i;
- tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
- if (!tc)
- return -ENOMEM;
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
+ if (!tc)
+ return -ENOMEM;
- mutex_init(&tc->t_conn_lock);
- tc->t_sock = NULL;
- tc->t_tinc = NULL;
- tc->t_tinc_hdr_rem = sizeof(struct rds_header);
- tc->t_tinc_data_rem = 0;
+ mutex_init(&tc->t_conn_path_lock);
+ tc->t_sock = NULL;
+ tc->t_tinc = NULL;
+ tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+ tc->t_tinc_data_rem = 0;
- conn->c_transport_data = tc;
+ conn->c_path[i].cp_transport_data = tc;
+ tc->t_cpath = &conn->c_path[i];
- spin_lock_irq(&rds_tcp_conn_lock);
- list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
- spin_unlock_irq(&rds_tcp_conn_lock);
+ spin_lock_irq(&rds_tcp_conn_lock);
+ list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
+ spin_unlock_irq(&rds_tcp_conn_lock);
+ rdsdebug("rds_conn_path [%d] tc %p\n", i,
+ conn->c_path[i].cp_transport_data);
+ }
- rdsdebug("alloced tc %p\n", conn->c_transport_data);
return 0;
}
@@ -318,6 +324,17 @@ static void rds_tcp_conn_free(void *arg)
kmem_cache_free(rds_tcp_conn_slab, tc);
}
+static bool list_has_conn(struct list_head *list, struct rds_connection *conn)
+{
+ struct rds_tcp_connection *tc, *_tc;
+
+ list_for_each_entry_safe(tc, _tc, list, t_tcp_node) {
+ if (tc->t_cpath->cp_conn == conn)
+ return true;
+ }
+ return false;
+}
+
static void rds_tcp_destroy_conns(void)
{
struct rds_tcp_connection *tc, *_tc;
@@ -325,29 +342,28 @@ static void rds_tcp_destroy_conns(void)
/* avoid calling conn_destroy with irqs off */
spin_lock_irq(&rds_tcp_conn_lock);
- list_splice(&rds_tcp_conn_list, &tmp_list);
- INIT_LIST_HEAD(&rds_tcp_conn_list);
+ list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+ if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+ list_move_tail(&tc->t_tcp_node, &tmp_list);
+ }
spin_unlock_irq(&rds_tcp_conn_lock);
- list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
- if (tc->conn->c_passive)
- rds_conn_destroy(tc->conn->c_passive);
- rds_conn_destroy(tc->conn);
- }
+ list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)
+ rds_conn_destroy(tc->t_cpath->cp_conn);
}
static void rds_tcp_exit(void);
struct rds_transport rds_tcp_transport = {
.laddr_check = rds_tcp_laddr_check,
- .xmit_prepare = rds_tcp_xmit_prepare,
- .xmit_complete = rds_tcp_xmit_complete,
+ .xmit_path_prepare = rds_tcp_xmit_path_prepare,
+ .xmit_path_complete = rds_tcp_xmit_path_complete,
.xmit = rds_tcp_xmit,
- .recv = rds_tcp_recv,
+ .recv_path = rds_tcp_recv_path,
.conn_alloc = rds_tcp_conn_alloc,
.conn_free = rds_tcp_conn_free,
- .conn_connect = rds_tcp_conn_connect,
- .conn_shutdown = rds_tcp_conn_shutdown,
+ .conn_path_connect = rds_tcp_conn_path_connect,
+ .conn_path_shutdown = rds_tcp_conn_path_shutdown,
.inc_copy_to_user = rds_tcp_inc_copy_to_user,
.inc_free = rds_tcp_inc_free,
.stats_info_copy = rds_tcp_stats_info_copy,
@@ -489,10 +505,30 @@ static struct pernet_operations rds_tcp_net_ops = {
.size = sizeof(struct rds_tcp_net),
};
+/* explicitly send a RST on each socket, thereby releasing any socket refcnts
+ * that may otherwise hold up netns deletion.
+ */
+static void rds_tcp_conn_paths_destroy(struct rds_connection *conn)
+{
+ struct rds_conn_path *cp;
+ struct rds_tcp_connection *tc;
+ int i;
+ struct sock *sk;
+
+ for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+ cp = &conn->c_path[i];
+ tc = cp->cp_transport_data;
+ if (!tc->t_sock)
+ continue;
+ sk = tc->t_sock->sk;
+ sk->sk_prot->disconnect(sk, 0);
+ tcp_done(sk);
+ }
+}
+
static void rds_tcp_kill_sock(struct net *net)
{
struct rds_tcp_connection *tc, *_tc;
- struct sock *sk;
LIST_HEAD(tmp_list);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
@@ -501,20 +537,17 @@ static void rds_tcp_kill_sock(struct net *net)
flush_work(&rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = read_pnet(&tc->conn->c_net);
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
- list_move_tail(&tc->t_tcp_node, &tmp_list);
+ if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn))
+ list_move_tail(&tc->t_tcp_node, &tmp_list);
}
spin_unlock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
- sk = tc->t_sock->sk;
- sk->sk_prot->disconnect(sk, 0);
- tcp_done(sk);
- if (tc->conn->c_passive)
- rds_conn_destroy(tc->conn->c_passive);
- rds_conn_destroy(tc->conn);
+ rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn);
+ rds_conn_destroy(tc->t_cpath->cp_conn);
}
}
@@ -552,12 +585,13 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = read_pnet(&tc->conn->c_net);
+ struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
if (net != c_net || !tc->t_sock)
continue;
- rds_conn_drop(tc->conn); /* reconnect with new parameters */
+ /* reconnect with new parameters */
+ rds_conn_path_drop(tc->t_cpath);
}
spin_unlock_irq(&rds_tcp_conn_lock);
}
@@ -617,7 +651,7 @@ static int rds_tcp_init(void)
ret = rds_tcp_recv_init();
if (ret)
- goto out_slab;
+ goto out_pernet;
ret = rds_trans_register(&rds_tcp_transport);
if (ret)
@@ -629,8 +663,9 @@ static int rds_tcp_init(void)
out_recv:
rds_tcp_recv_exit();
-out_slab:
+out_pernet:
unregister_pernet_subsys(&rds_tcp_net_ops);
+out_slab:
kmem_cache_destroy(rds_tcp_conn_slab);
out:
return ret;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 7940babf6c71..1c3160faa963 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -11,11 +11,11 @@ struct rds_tcp_incoming {
struct rds_tcp_connection {
struct list_head t_tcp_node;
- struct rds_connection *conn;
- /* t_conn_lock synchronizes the connection establishment between
- * rds_tcp_accept_one and rds_tcp_conn_connect
+ struct rds_conn_path *t_cpath;
+ /* t_conn_path_lock synchronizes the connection establishment between
+ * rds_tcp_accept_one and rds_tcp_conn_path_connect
*/
- struct mutex t_conn_lock;
+ struct mutex t_conn_path_lock;
struct socket *t_sock;
void *t_orig_write_space;
void *t_orig_data_ready;
@@ -49,8 +49,8 @@ struct rds_tcp_statistics {
/* tcp.c */
void rds_tcp_tune(struct socket *sock);
void rds_tcp_nonagle(struct socket *sock);
-void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
-void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn);
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
+void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
@@ -60,8 +60,8 @@ extern struct rds_transport rds_tcp_transport;
void rds_tcp_accept_work(struct sock *sk);
/* tcp_connect.c */
-int rds_tcp_conn_connect(struct rds_connection *conn);
-void rds_tcp_conn_shutdown(struct rds_connection *conn);
+int rds_tcp_conn_path_connect(struct rds_conn_path *cp);
+void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
void rds_tcp_state_change(struct sock *sk);
/* tcp_listen.c */
@@ -75,13 +75,13 @@ int rds_tcp_keepalive(struct socket *sock);
int rds_tcp_recv_init(void);
void rds_tcp_recv_exit(void);
void rds_tcp_data_ready(struct sock *sk);
-int rds_tcp_recv(struct rds_connection *conn);
+int rds_tcp_recv_path(struct rds_conn_path *cp);
void rds_tcp_inc_free(struct rds_incoming *inc);
int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
/* tcp_send.c */
-void rds_tcp_xmit_prepare(struct rds_connection *conn);
-void rds_tcp_xmit_complete(struct rds_connection *conn);
+void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp);
+void rds_tcp_xmit_path_complete(struct rds_conn_path *cp);
int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg, unsigned int off);
void rds_tcp_write_space(struct sock *sk);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 96c2c4d17909..c916715fbe61 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -41,16 +41,16 @@
void rds_tcp_state_change(struct sock *sk)
{
void (*state_change)(struct sock *sk);
- struct rds_connection *conn;
+ struct rds_conn_path *cp;
struct rds_tcp_connection *tc;
read_lock_bh(&sk->sk_callback_lock);
- conn = sk->sk_user_data;
- if (!conn) {
+ cp = sk->sk_user_data;
+ if (!cp) {
state_change = sk->sk_state_change;
goto out;
}
- tc = conn->c_transport_data;
+ tc = cp->cp_transport_data;
state_change = tc->t_orig_state_change;
rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state);
@@ -61,12 +61,11 @@ void rds_tcp_state_change(struct sock *sk)
case TCP_SYN_RECV:
break;
case TCP_ESTABLISHED:
- rds_connect_path_complete(&conn->c_path[0],
- RDS_CONN_CONNECTING);
+ rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
break;
case TCP_CLOSE_WAIT:
case TCP_CLOSE:
- rds_conn_drop(conn);
+ rds_conn_path_drop(cp);
default:
break;
}
@@ -75,17 +74,18 @@ out:
state_change(sk);
}
-int rds_tcp_conn_connect(struct rds_connection *conn)
+int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
{
struct socket *sock = NULL;
struct sockaddr_in src, dest;
int ret;
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_connection *conn = cp->cp_conn;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
- mutex_lock(&tc->t_conn_lock);
+ mutex_lock(&tc->t_conn_path_lock);
- if (rds_conn_up(conn)) {
- mutex_unlock(&tc->t_conn_lock);
+ if (rds_conn_path_up(cp)) {
+ mutex_unlock(&tc->t_conn_path_lock);
return 0;
}
ret = sock_create_kern(rds_conn_net(conn), PF_INET,
@@ -114,10 +114,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
* once we call connect() we can start getting callbacks and they
* own the socket
*/
- rds_tcp_set_callbacks(sock, conn);
+ rds_tcp_set_callbacks(sock, cp);
ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
O_NONBLOCK);
+ cp->cp_outgoing = 1;
rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS)
ret = 0;
@@ -125,11 +126,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
rds_tcp_keepalive(sock);
sock = NULL;
} else {
- rds_tcp_restore_callbacks(sock, conn->c_transport_data);
+ rds_tcp_restore_callbacks(sock, cp->cp_transport_data);
}
out:
- mutex_unlock(&tc->t_conn_lock);
+ mutex_unlock(&tc->t_conn_path_lock);
if (sock)
sock_release(sock);
return ret;
@@ -144,12 +145,13 @@ out:
* callbacks to those set by TCP. Our callbacks won't execute again once we
* hold the sock lock.
*/
-void rds_tcp_conn_shutdown(struct rds_connection *conn)
+void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct socket *sock = tc->t_sock;
- rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock);
+ rdsdebug("shutting down conn %p tc %p sock %p\n",
+ cp->cp_conn, tc, sock);
if (sock) {
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index f9cc945a77b3..ca975a217a49 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -79,6 +79,7 @@ int rds_tcp_accept_one(struct socket *sock)
struct inet_sock *inet;
struct rds_tcp_connection *rs_tcp = NULL;
int conn_state;
+ struct rds_conn_path *cp;
if (!sock) /* module unload or netns delete in progress */
return -ENETUNREACH;
@@ -120,8 +121,9 @@ int rds_tcp_accept_one(struct socket *sock)
* rds_tcp_state_change() will do that cleanup
*/
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
+ cp = &conn->c_path[0];
rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
- mutex_lock(&rs_tcp->t_conn_lock);
+ mutex_lock(&rs_tcp->t_conn_path_lock);
conn_state = rds_conn_state(conn);
if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP)
goto rst_nsk;
@@ -136,16 +138,14 @@ int rds_tcp_accept_one(struct socket *sock)
!conn->c_path[0].cp_outgoing) {
goto rst_nsk;
} else {
- rds_tcp_reset_callbacks(new_sock, conn);
+ rds_tcp_reset_callbacks(new_sock, cp);
conn->c_path[0].cp_outgoing = 0;
/* rds_connect_path_complete() marks RDS_CONN_UP */
- rds_connect_path_complete(&conn->c_path[0],
- RDS_CONN_RESETTING);
+ rds_connect_path_complete(cp, RDS_CONN_RESETTING);
}
} else {
- rds_tcp_set_callbacks(new_sock, conn);
- rds_connect_path_complete(&conn->c_path[0],
- RDS_CONN_CONNECTING);
+ rds_tcp_set_callbacks(new_sock, cp);
+ rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
}
new_sock = NULL;
ret = 0;
@@ -156,7 +156,7 @@ rst_nsk:
ret = 0;
out:
if (rs_tcp)
- mutex_unlock(&rs_tcp->t_conn_lock);
+ mutex_unlock(&rs_tcp->t_conn_path_lock);
if (new_sock)
sock_release(new_sock);
return ret;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 4a87d9ef3084..ad4892e97f91 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -34,7 +34,6 @@
#include <linux/slab.h>
#include <net/tcp.h>
-#include "rds_single_path.h"
#include "rds.h"
#include "tcp.h"
@@ -148,7 +147,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn,
}
struct rds_tcp_desc_arg {
- struct rds_connection *conn;
+ struct rds_conn_path *conn_path;
gfp_t gfp;
};
@@ -156,8 +155,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
unsigned int offset, size_t len)
{
struct rds_tcp_desc_arg *arg = desc->arg.data;
- struct rds_connection *conn = arg->conn;
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_conn_path *cp = arg->conn_path;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct rds_tcp_incoming *tinc = tc->t_tinc;
struct sk_buff *clone;
size_t left = len, to_copy;
@@ -179,7 +178,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
}
tc->t_tinc = tinc;
rdsdebug("alloced tinc %p\n", tinc);
- rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr);
+ rds_inc_path_init(&tinc->ti_inc, cp,
+ cp->cp_conn->c_faddr);
/*
* XXX * we might be able to use the __ variants when
* we've already serialized at a higher level.
@@ -229,6 +229,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
}
if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) {
+ struct rds_connection *conn = cp->cp_conn;
+
if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
rds_tcp_cong_recv(conn, tinc);
else
@@ -251,15 +253,15 @@ out:
}
/* the caller has to hold the sock lock */
-static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
+static int rds_tcp_read_sock(struct rds_conn_path *cp, gfp_t gfp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct socket *sock = tc->t_sock;
read_descriptor_t desc;
struct rds_tcp_desc_arg arg;
/* It's like glib in the kernel! */
- arg.conn = conn;
+ arg.conn_path = cp;
arg.gfp = gfp;
desc.arg.data = &arg;
desc.error = 0;
@@ -279,16 +281,17 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
* if we fail to allocate we're in trouble.. blindly wait some time before
* trying again to see if the VM can free up something for us.
*/
-int rds_tcp_recv(struct rds_connection *conn)
+int rds_tcp_recv_path(struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
struct socket *sock = tc->t_sock;
int ret = 0;
- rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock);
+ rdsdebug("recv worker path [%d] tc %p sock %p\n",
+ cp->cp_index, tc, sock);
lock_sock(sock->sk);
- ret = rds_tcp_read_sock(conn, GFP_KERNEL);
+ ret = rds_tcp_read_sock(cp, GFP_KERNEL);
release_sock(sock->sk);
return ret;
@@ -297,24 +300,24 @@ int rds_tcp_recv(struct rds_connection *conn)
void rds_tcp_data_ready(struct sock *sk)
{
void (*ready)(struct sock *sk);
- struct rds_connection *conn;
+ struct rds_conn_path *cp;
struct rds_tcp_connection *tc;
rdsdebug("data ready sk %p\n", sk);
read_lock_bh(&sk->sk_callback_lock);
- conn = sk->sk_user_data;
- if (!conn) { /* check for teardown race */
+ cp = sk->sk_user_data;
+ if (!cp) { /* check for teardown race */
ready = sk->sk_data_ready;
goto out;
}
- tc = conn->c_transport_data;
+ tc = cp->cp_transport_data;
ready = tc->t_orig_data_ready;
rds_tcp_stats_inc(s_tcp_data_ready_calls);
- if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM)
- queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+ if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM)
+ queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
out:
read_unlock_bh(&sk->sk_callback_lock);
ready(sk);
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 710f1aae97ad..57e0f5826406 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -49,16 +49,16 @@ static void rds_tcp_cork(struct socket *sock, int val)
set_fs(oldfs);
}
-void rds_tcp_xmit_prepare(struct rds_connection *conn)
+void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
rds_tcp_cork(tc->t_sock, 1);
}
-void rds_tcp_xmit_complete(struct rds_connection *conn)
+void rds_tcp_xmit_path_complete(struct rds_conn_path *cp)
{
- struct rds_tcp_connection *tc = conn->c_transport_data;
+ struct rds_tcp_connection *tc = cp->cp_transport_data;
rds_tcp_cork(tc->t_sock, 0);
}
@@ -178,27 +178,27 @@ static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack)
void rds_tcp_write_space(struct sock *sk)
{
void (*write_space)(struct sock *sk);
- struct rds_connection *conn;
+ struct rds_conn_path *cp;
struct rds_tcp_connection *tc;
read_lock_bh(&sk->sk_callback_lock);
- conn = sk->sk_user_data;
- if (!conn) {
+ cp = sk->sk_user_data;
+ if (!cp) {
write_space = sk->sk_write_space;
goto out;
}
- tc = conn->c_transport_data;
+ tc = cp->cp_transport_data;
rdsdebug("write_space for tc %p\n", tc);
write_space = tc->t_orig_write_space;
rds_tcp_stats_inc(s_tcp_write_space_calls);
rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc));
tc->t_last_seen_una = rds_tcp_snd_una(tc);
- rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
+ rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
- queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+ queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
out:
read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 9fbe95bb14a9..bc97d67f29cc 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -125,6 +125,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
conn, &conn->c_laddr, &conn->c_faddr,
cp->cp_reconnect_jiffies);
+ /* let peer with smaller addr initiate reconnect, to avoid duels */
+ if (conn->c_trans->t_type == RDS_TRANS_TCP &&
+ conn->c_laddr > conn->c_faddr)
+ return;
+
set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
if (cp->cp_reconnect_jiffies == 0) {
cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
@@ -152,8 +157,9 @@ void rds_connect_worker(struct work_struct *work)
int ret;
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
- if (rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
- ret = conn->c_trans->conn_connect(conn);
+ ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
+ if (ret) {
+ ret = conn->c_trans->conn_path_connect(cp);
rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
conn, &conn->c_laddr, &conn->c_faddr, ret);
@@ -203,7 +209,7 @@ void rds_recv_worker(struct work_struct *work)
int ret;
if (rds_conn_path_state(cp) == RDS_CONN_UP) {
- ret = cp->cp_conn->c_trans->recv(cp->cp_conn);
+ ret = cp->cp_conn->c_trans->recv_path(cp);
rdsdebug("conn %p ret %d\n", cp->cp_conn, ret);
switch (ret) {
case -EAGAIN:
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index f7b6cf49ea6f..ef74bffa6101 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -223,15 +223,10 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
- fp = bpf_prog_get(bpf_fd);
+ fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT);
if (IS_ERR(fp))
return PTR_ERR(fp);
- if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
- bpf_prog_put(fp);
- return -EINVAL;
- }
-
if (tb[TCA_ACT_BPF_NAME]) {
name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
nla_len(tb[TCA_ACT_BPF_NAME]),
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5b135d357e1e..70cfbbf96af2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
if (!(at & AT_EGRESS)) {
if (m->tcfm_ok_push)
- skb_push(skb2, skb->mac_len);
+ skb_push_rcsum(skb2, skb->mac_len);
}
/* mirror is always swallowed */
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 53d1486cddf7..8e573c0f8742 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -47,6 +47,8 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
skb_set_queue_mapping(skb, d->queue_mapping);
if (d->flags & SKBEDIT_F_MARK)
skb->mark = d->mark;
+ if (d->flags & SKBEDIT_F_PTYPE)
+ skb->pkt_type = d->ptype;
spin_unlock(&d->tcf_lock);
return d->tcf_action;
@@ -57,6 +59,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
[TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) },
[TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
[TCA_SKBEDIT_MARK] = { .len = sizeof(u32) },
+ [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
};
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -68,7 +71,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct tc_skbedit *parm;
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL;
- u16 *queue_mapping = NULL;
+ u16 *queue_mapping = NULL, *ptype = NULL;
bool exists = false;
int ret = 0, err;
@@ -92,6 +95,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
}
+ if (tb[TCA_SKBEDIT_PTYPE] != NULL) {
+ ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]);
+ if (!skb_pkt_type_ok(*ptype))
+ return -EINVAL;
+ flags |= SKBEDIT_F_PTYPE;
+ }
+
if (tb[TCA_SKBEDIT_MARK] != NULL) {
flags |= SKBEDIT_F_MARK;
mark = nla_data(tb[TCA_SKBEDIT_MARK]);
@@ -132,6 +142,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
d->queue_mapping = *queue_mapping;
if (flags & SKBEDIT_F_MARK)
d->mark = *mark;
+ if (flags & SKBEDIT_F_PTYPE)
+ d->ptype = *ptype;
d->tcf_action = parm->action;
@@ -158,16 +170,16 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
if ((d->flags & SKBEDIT_F_PRIORITY) &&
- nla_put(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
- &d->priority))
+ nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority))
goto nla_put_failure;
if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
- nla_put(skb, TCA_SKBEDIT_QUEUE_MAPPING,
- sizeof(d->queue_mapping), &d->queue_mapping))
+ nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping))
goto nla_put_failure;
if ((d->flags & SKBEDIT_F_MARK) &&
- nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
- &d->mark))
+ nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark))
+ goto nla_put_failure;
+ if ((d->flags & SKBEDIT_F_PTYPE) &&
+ nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
goto nla_put_failure;
tcf_tm_dump(&t, &d->tcf_tm);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 7b342c779da7..c3002c2c68bb 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -272,15 +272,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
- fp = bpf_prog_get(bpf_fd);
+ fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
if (IS_ERR(fp))
return PTR_ERR(fp);
- if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
- bpf_prog_put(fp);
- return -EINVAL;
- }
-
if (tb[TCA_BPF_NAME]) {
name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
nla_len(tb[TCA_BPF_NAME]),
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 8cb5eff7b79c..dff92ea772fe 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -130,7 +130,6 @@ struct hfsc_class {
struct rb_node vt_node; /* parent's vt_tree member */
struct rb_root cf_tree; /* active children sorted by cl_f */
struct rb_node cf_node; /* parent's cf_heap member */
- struct list_head dlist; /* drop list member */
u64 cl_total; /* total work in bytes */
u64 cl_cumul; /* cumulative work in bytes done by
@@ -177,8 +176,6 @@ struct hfsc_sched {
struct hfsc_class root; /* root class */
struct Qdisc_class_hash clhash; /* class hash */
struct rb_root eligible; /* eligible tree */
- struct list_head droplist; /* active leaf class list (for
- dropping) */
struct qdisc_watchdog watchdog; /* watchdog timer */
};
@@ -781,6 +778,20 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
else
go_passive = 0;
+ /* update vt */
+ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+ - cl->cl_vtoff + cl->cl_vtadj;
+
+ /*
+ * if vt of the class is smaller than cvtmin,
+ * the class was skipped in the past due to non-fit.
+ * if so, we need to adjust vtadj.
+ */
+ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+ cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+ cl->cl_vt = cl->cl_parent->cl_cvtmin;
+ }
+
if (go_passive) {
/* no more active child, going passive */
@@ -797,25 +808,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
continue;
}
- /*
- * update vt and f
- */
- cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
- - cl->cl_vtoff + cl->cl_vtadj;
-
- /*
- * if vt of the class is smaller than cvtmin,
- * the class was skipped in the past due to non-fit.
- * if so, we need to adjust vtadj.
- */
- if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
- cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
- cl->cl_vt = cl->cl_parent->cl_cvtmin;
- }
-
/* update the vt tree */
vttree_update(cl);
+ /* update f */
if (cl->cl_flags & HFSC_USC) {
cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
cl->cl_total);
@@ -858,7 +854,6 @@ set_active(struct hfsc_class *cl, unsigned int len)
if (cl->cl_flags & HFSC_FSC)
init_vf(cl, len);
- list_add_tail(&cl->dlist, &cl->sched->droplist);
}
static void
@@ -867,8 +862,6 @@ set_passive(struct hfsc_class *cl)
if (cl->cl_flags & HFSC_RSC)
eltree_remove(cl);
- list_del(&cl->dlist);
-
/*
* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
* needs to be called explicitly to remove a class from vttree.
@@ -882,7 +875,7 @@ qdisc_peek_len(struct Qdisc *sch)
unsigned int len;
skb = sch->ops->peek(sch);
- if (skb == NULL) {
+ if (unlikely(skb == NULL)) {
qdisc_warn_nonwc("qdisc_peek_len", sch);
return 0;
}
@@ -947,7 +940,7 @@ static void
hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
{
sc2isc(fsc, &cl->cl_fsc);
- rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total);
cl->cl_flags |= HFSC_FSC;
}
@@ -1443,7 +1436,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
if (err < 0)
return err;
q->eligible = RB_ROOT;
- INIT_LIST_HEAD(&q->droplist);
q->root.cl_common.classid = sch->handle;
q->root.refcnt = 1;
@@ -1527,7 +1519,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
hfsc_reset_class(cl);
}
q->eligible = RB_ROOT;
- INIT_LIST_HEAD(&q->droplist);
qdisc_watchdog_cancel(&q->watchdog);
sch->qstats.backlog = 0;
sch->q.qlen = 0;
@@ -1594,8 +1585,17 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
return err;
}
- if (cl->qdisc->q.qlen == 1)
+ if (cl->qdisc->q.qlen == 1) {
set_active(cl, qdisc_pkt_len(skb));
+ /*
+ * If this is the first packet, isolate the head so an eventual
+ * head drop before the first dequeue operation has no chance
+ * to invalidate the deadline.
+ */
+ if (cl->cl_flags & HFSC_RSC)
+ cl->qdisc->ops->peek(cl->qdisc);
+
+ }
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 3ad9fab1985f..1fd464764765 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
- nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]),
+ nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME],
TIPC_MAX_LINK_NAME);
return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO,