49 files changed, 5436 insertions, 1307 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f4c042be0216..77c37fb0b6a6 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1834,6 +1834,16 @@ stable_secret - IPv6 address
 
 	By default the stable secret is unset.
 
+addr_gen_mode - INTEGER
+	Defines how link-local and autoconf addresses are generated.
+
+	0: generate address based on EUI64 (default)
+	1: do no generate a link-local address, use EUI64 for addresses generated
+	   from autoconf
+	2: generate stable privacy addresses, using the secret from
+	   stable_secret (RFC7217)
+	3: generate stable privacy addresses, using a random secret if unset
+
 drop_unicast_in_l2_multicast - BOOLEAN
 	Drop any unicast IPv6 packets that are received in link-layer
 	multicast (or broadcast) frames.
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 2369b2f0d95a..d83233ae4a15 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -4088,7 +4088,7 @@ static void cnic_cm_free_mem(struct cnic_dev *dev)
 {
 	struct cnic_local *cp = dev->cnic_priv;
 
-	kfree(cp->csk_tbl);
+	kvfree(cp->csk_tbl);
 	cp->csk_tbl = NULL;
 	cnic_free_id_tbl(&cp->csk_port_tbl);
 }
@@ -4098,8 +4098,8 @@ static int cnic_cm_alloc_mem(struct cnic_dev *dev)
 	struct cnic_local *cp = dev->cnic_priv;
 	u32 port_id;
 
-	cp->csk_tbl = kcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock),
-			      GFP_KERNEL);
+	cp->csk_tbl = kvcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock),
+			       GFP_KERNEL);
 	if (!cp->csk_tbl)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 516c883e1613..511606fd1b20 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2962,6 +2962,169 @@ static const struct file_operations chcr_stats_debugfs_fops = {
         .llseek  = seq_lseek,
         .release = single_release,
 };
+
+#define PRINT_ADAP_STATS(string, value) \
+	seq_printf(seq, "%-25s %-20llu\n", (string), \
+		   (unsigned long long)(value))
+
+#define PRINT_CH_STATS(string, value) \
+do { \
+	seq_printf(seq, "%-25s ", (string)); \
+	for (i = 0; i < adap->params.arch.nchan; i++) \
+		seq_printf(seq, "%-20llu ", \
+			   (unsigned long long)stats.value[i]); \
+	seq_printf(seq, "\n"); \
+} while (0)
+
+#define PRINT_CH_STATS2(string, value) \
+do { \
+	seq_printf(seq, "%-25s ", (string)); \
+	for (i = 0; i < adap->params.arch.nchan; i++) \
+		seq_printf(seq, "%-20llu ", \
+			   (unsigned long long)stats[i].value); \
+	seq_printf(seq, "\n"); \
+} while (0)
+
+static void show_tcp_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_tcp_stats v4, v6;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_tcp_stats(adap, &v4, &v6, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_ADAP_STATS("tcp_ipv4_out_rsts:", v4.tcp_out_rsts);
+	PRINT_ADAP_STATS("tcp_ipv4_in_segs:", v4.tcp_in_segs);
+	PRINT_ADAP_STATS("tcp_ipv4_out_segs:", v4.tcp_out_segs);
+	PRINT_ADAP_STATS("tcp_ipv4_retrans_segs:", v4.tcp_retrans_segs);
+	PRINT_ADAP_STATS("tcp_ipv6_out_rsts:", v6.tcp_out_rsts);
+	PRINT_ADAP_STATS("tcp_ipv6_in_segs:", v6.tcp_in_segs);
+	PRINT_ADAP_STATS("tcp_ipv6_out_segs:", v6.tcp_out_segs);
+	PRINT_ADAP_STATS("tcp_ipv6_retrans_segs:", v6.tcp_retrans_segs);
+}
+
+static void show_ddp_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_usm_stats stats;
+
+	spin_lock(&adap->stats_lock);
+	t4_get_usm_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_ADAP_STATS("usm_ddp_frames:", stats.frames);
+	PRINT_ADAP_STATS("usm_ddp_octets:", stats.octets);
+	PRINT_ADAP_STATS("usm_ddp_drops:", stats.drops);
+}
+
+static void show_rdma_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_rdma_stats stats;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_rdma_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_ADAP_STATS("rdma_no_rqe_mod_defer:", stats.rqe_dfr_mod);
+	PRINT_ADAP_STATS("rdma_no_rqe_pkt_defer:", stats.rqe_dfr_pkt);
+}
+
+static void show_tp_err_adapter_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_err_stats stats;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_err_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_ADAP_STATS("tp_err_ofld_no_neigh:", stats.ofld_no_neigh);
+	PRINT_ADAP_STATS("tp_err_ofld_cong_defer:", stats.ofld_cong_defer);
+}
+
+static void show_cpl_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_cpl_stats stats;
+	u8 i;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_cpl_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_CH_STATS("tp_cpl_requests:", req);
+	PRINT_CH_STATS("tp_cpl_responses:", rsp);
+}
+
+static void show_tp_err_channel_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_err_stats stats;
+	u8 i;
+
+	spin_lock(&adap->stats_lock);
+	t4_tp_get_err_stats(adap, &stats, false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_CH_STATS("tp_mac_in_errs:", mac_in_errs);
+	PRINT_CH_STATS("tp_hdr_in_errs:", hdr_in_errs);
+	PRINT_CH_STATS("tp_tcp_in_errs:", tcp_in_errs);
+	PRINT_CH_STATS("tp_tcp6_in_errs:", tcp6_in_errs);
+	PRINT_CH_STATS("tp_tnl_cong_drops:", tnl_cong_drops);
+	PRINT_CH_STATS("tp_tnl_tx_drops:", tnl_tx_drops);
+	PRINT_CH_STATS("tp_ofld_vlan_drops:", ofld_vlan_drops);
+	PRINT_CH_STATS("tp_ofld_chan_drops:", ofld_chan_drops);
+}
+
+static void show_fcoe_stats(struct seq_file *seq)
+{
+	struct adapter *adap = seq->private;
+	struct tp_fcoe_stats stats[NCHAN];
+	u8 i;
+
+	spin_lock(&adap->stats_lock);
+	for (i = 0; i < adap->params.arch.nchan; i++)
+		t4_get_fcoe_stats(adap, i, &stats[i], false);
+	spin_unlock(&adap->stats_lock);
+
+	PRINT_CH_STATS2("fcoe_octets_ddp", octets_ddp);
+	PRINT_CH_STATS2("fcoe_frames_ddp", frames_ddp);
+	PRINT_CH_STATS2("fcoe_frames_drop", frames_drop);
+}
+
+#undef PRINT_CH_STATS2
+#undef PRINT_CH_STATS
+#undef PRINT_ADAP_STATS
+
+static int tp_stats_show(struct seq_file *seq, void *v)
+{
+	struct adapter *adap = seq->private;
+
+	seq_puts(seq, "\n--------Adapter Stats--------\n");
+	show_tcp_stats(seq);
+	show_ddp_stats(seq);
+	show_rdma_stats(seq);
+	show_tp_err_adapter_stats(seq);
+
+	seq_puts(seq, "\n-------- Channel Stats --------\n");
+	if (adap->params.arch.nchan == NCHAN)
+		seq_printf(seq, "%-25s %-20s %-20s %-20s %-20s\n",
+			   " ", "channel 0", "channel 1",
+			   "channel 2", "channel 3");
+	else
+		seq_printf(seq, "%-25s %-20s %-20s\n",
+			   " ", "channel 0", "channel 1");
+	show_cpl_stats(seq);
+	show_tp_err_channel_stats(seq);
+	show_fcoe_stats(seq);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_DEBUGFS_FILE(tp_stats);
+
 /* Add an array of Debug FS files.
  */
 void add_debugfs_files(struct adapter *adap,
@@ -3038,6 +3201,7 @@ int t4_setup_debugfs(struct adapter *adap)
 		{ "blocked_fl", &blocked_fl_fops, 0600, 0 },
 		{ "meminfo", &meminfo_fops, 0400, 0 },
 		{ "crypto", &chcr_stats_debugfs_fops, 0400, 0 },
+		{ "tp_stats", &tp_stats_debugfs_fops, 0400, 0 },
 	};
 
 	/* Debug FS nodes common to all T5 and later adapters.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index ddb8b9eba6bf..a14a290a56ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -115,42 +115,10 @@ static char adapter_stats_strings[][ETH_GSTRING_LEN] = {
 	"db_drop                ",
 	"db_full                ",
 	"db_empty               ",
-	"tcp_ipv4_out_rsts      ",
-	"tcp_ipv4_in_segs       ",
-	"tcp_ipv4_out_segs      ",
-	"tcp_ipv4_retrans_segs  ",
-	"tcp_ipv6_out_rsts      ",
-	"tcp_ipv6_in_segs       ",
-	"tcp_ipv6_out_segs      ",
-	"tcp_ipv6_retrans_segs  ",
-	"usm_ddp_frames         ",
-	"usm_ddp_octets         ",
-	"usm_ddp_drops          ",
-	"rdma_no_rqe_mod_defer  ",
-	"rdma_no_rqe_pkt_defer  ",
-	"tp_err_ofld_no_neigh   ",
-	"tp_err_ofld_cong_defer ",
 	"write_coal_success     ",
 	"write_coal_fail        ",
 };
 
-static char channel_stats_strings[][ETH_GSTRING_LEN] = {
-	"--------Channel--------- ",
-	"tp_cpl_requests        ",
-	"tp_cpl_responses       ",
-	"tp_mac_in_errs         ",
-	"tp_hdr_in_errs         ",
-	"tp_tcp_in_errs         ",
-	"tp_tcp6_in_errs        ",
-	"tp_tnl_cong_drops      ",
-	"tp_tnl_tx_drops        ",
-	"tp_ofld_vlan_drops     ",
-	"tp_ofld_chan_drops     ",
-	"fcoe_octets_ddp        ",
-	"fcoe_frames_ddp        ",
-	"fcoe_frames_drop       ",
-};
-
 static char loopback_stats_strings[][ETH_GSTRING_LEN] = {
 	"-------Loopback----------- ",
 	"octets_ok              ",
@@ -187,7 +155,6 @@ static int get_sset_count(struct net_device *dev, int sset)
 	case ETH_SS_STATS:
 		return ARRAY_SIZE(stats_strings) +
 		       ARRAY_SIZE(adapter_stats_strings) +
-		       ARRAY_SIZE(channel_stats_strings) +
 		       ARRAY_SIZE(loopback_stats_strings);
 	case ETH_SS_PRIV_FLAGS:
 		return ARRAY_SIZE(cxgb4_priv_flags_strings);
@@ -252,9 +219,6 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
 		memcpy(data, adapter_stats_strings,
 		       sizeof(adapter_stats_strings));
 		data += sizeof(adapter_stats_strings);
-		memcpy(data, channel_stats_strings,
-		       sizeof(channel_stats_strings));
-		data += sizeof(channel_stats_strings);
 		memcpy(data, loopback_stats_strings,
 		       sizeof(loopback_stats_strings));
 	} else if (stringset == ETH_SS_PRIV_FLAGS) {
@@ -280,41 +244,10 @@ struct adapter_stats {
 	u64 db_drop;
 	u64 db_full;
 	u64 db_empty;
-	u64 tcp_v4_out_rsts;
-	u64 tcp_v4_in_segs;
-	u64 tcp_v4_out_segs;
-	u64 tcp_v4_retrans_segs;
-	u64 tcp_v6_out_rsts;
-	u64 tcp_v6_in_segs;
-	u64 tcp_v6_out_segs;
-	u64 tcp_v6_retrans_segs;
-	u64 frames;
-	u64 octets;
-	u64 drops;
-	u64 rqe_dfr_mod;
-	u64 rqe_dfr_pkt;
-	u64 ofld_no_neigh;
-	u64 ofld_cong_defer;
 	u64 wc_success;
 	u64 wc_fail;
 };
 
-struct channel_stats {
-	u64 cpl_req;
-	u64 cpl_rsp;
-	u64 mac_in_errs;
-	u64 hdr_in_errs;
-	u64 tcp_in_errs;
-	u64 tcp6_in_errs;
-	u64 tnl_cong_drops;
-	u64 tnl_tx_drops;
-	u64 ofld_vlan_drops;
-	u64 ofld_chan_drops;
-	u64 octets_ddp;
-	u64 frames_ddp;
-	u64 frames_drop;
-};
-
 static void collect_sge_port_stats(const struct adapter *adap,
 				   const struct port_info *p,
 				   struct queue_port_stats *s)
@@ -337,45 +270,14 @@ static void collect_sge_port_stats(const struct adapter *adap,
 
 static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
 {
-	struct tp_tcp_stats v4, v6;
-	struct tp_rdma_stats rdma_stats;
-	struct tp_err_stats err_stats;
-	struct tp_usm_stats usm_stats;
 	u64 val1, val2;
 
 	memset(s, 0, sizeof(*s));
 
-	spin_lock(&adap->stats_lock);
-	t4_tp_get_tcp_stats(adap, &v4, &v6, false);
-	t4_tp_get_rdma_stats(adap, &rdma_stats, false);
-	t4_get_usm_stats(adap, &usm_stats, false);
-	t4_tp_get_err_stats(adap, &err_stats, false);
-	spin_unlock(&adap->stats_lock);
-
 	s->db_drop = adap->db_stats.db_drop;
 	s->db_full = adap->db_stats.db_full;
 	s->db_empty = adap->db_stats.db_empty;
 
-	s->tcp_v4_out_rsts = v4.tcp_out_rsts;
-	s->tcp_v4_in_segs = v4.tcp_in_segs;
-	s->tcp_v4_out_segs = v4.tcp_out_segs;
-	s->tcp_v4_retrans_segs = v4.tcp_retrans_segs;
-	s->tcp_v6_out_rsts = v6.tcp_out_rsts;
-	s->tcp_v6_in_segs = v6.tcp_in_segs;
-	s->tcp_v6_out_segs = v6.tcp_out_segs;
-	s->tcp_v6_retrans_segs = v6.tcp_retrans_segs;
-
-	if (is_offload(adap)) {
-		s->frames = usm_stats.frames;
-		s->octets = usm_stats.octets;
-		s->drops = usm_stats.drops;
-		s->rqe_dfr_mod = rdma_stats.rqe_dfr_mod;
-		s->rqe_dfr_pkt = rdma_stats.rqe_dfr_pkt;
-	}
-
-	s->ofld_no_neigh = err_stats.ofld_no_neigh;
-	s->ofld_cong_defer = err_stats.ofld_cong_defer;
-
 	if (!is_t4(adap->params.chip)) {
 		int v;
 
@@ -389,36 +291,6 @@ static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
 	}
 }
 
-static void collect_channel_stats(struct adapter *adap, struct channel_stats *s,
-				  u8 i)
-{
-	struct tp_cpl_stats cpl_stats;
-	struct tp_err_stats err_stats;
-	struct tp_fcoe_stats fcoe_stats;
-
-	memset(s, 0, sizeof(*s));
-
-	spin_lock(&adap->stats_lock);
-	t4_tp_get_cpl_stats(adap, &cpl_stats, false);
-	t4_tp_get_err_stats(adap, &err_stats, false);
-	t4_get_fcoe_stats(adap, i, &fcoe_stats, false);
-	spin_unlock(&adap->stats_lock);
-
-	s->cpl_req = cpl_stats.req[i];
-	s->cpl_rsp = cpl_stats.rsp[i];
-	s->mac_in_errs = err_stats.mac_in_errs[i];
-	s->hdr_in_errs = err_stats.hdr_in_errs[i];
-	s->tcp_in_errs = err_stats.tcp_in_errs[i];
-	s->tcp6_in_errs = err_stats.tcp6_in_errs[i];
-	s->tnl_cong_drops = err_stats.tnl_cong_drops[i];
-	s->tnl_tx_drops = err_stats.tnl_tx_drops[i];
-	s->ofld_vlan_drops = err_stats.ofld_vlan_drops[i];
-	s->ofld_chan_drops = err_stats.ofld_chan_drops[i];
-	s->octets_ddp = fcoe_stats.octets_ddp;
-	s->frames_ddp = fcoe_stats.frames_ddp;
-	s->frames_drop = fcoe_stats.frames_drop;
-}
-
 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
 		      u64 *data)
 {
@@ -439,11 +311,6 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
 	data += sizeof(struct adapter_stats) / sizeof(u64);
 
 	*data++ = (u64)pi->port_id;
-	collect_channel_stats(adapter, (struct channel_stats *)data,
-			      pi->port_id);
-	data += sizeof(struct channel_stats) / sizeof(u64);
-
-	*data++ = (u64)pi->port_id;
 	memset(&s, 0, sizeof(s));
 	t4_get_lb_stats(adapter, pi->port_id, &s);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ebb46c472d52..6807bc3a44fb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -3412,7 +3412,9 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 	c.iqsize = htons(iq->size);
 	c.iqaddr = cpu_to_be64(iq->phys_addr);
 	if (cong >= 0)
-		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F);
+		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F |
+				FW_IQ_CMD_IQTYPE_V(cong ? FW_IQ_IQTYPE_NIC
+							:  FW_IQ_IQTYPE_OFLD));
 
 	if (fl) {
 		enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index f1967cf6d43c..5dc6c4154af8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1472,6 +1472,12 @@ enum fw_iq_type {
 	FW_IQ_TYPE_NO_FL_INT_CAP
 };
 
+enum fw_iq_iqtype {
+	FW_IQ_IQTYPE_OTHER,
+	FW_IQ_IQTYPE_NIC,
+	FW_IQ_IQTYPE_OFLD,
+};
+
 struct fw_iq_cmd {
 	__be32 op_to_vfn;
 	__be32 alloc_to_len16;
@@ -1586,6 +1592,12 @@ struct fw_iq_cmd {
 #define FW_IQ_CMD_IQFLINTISCSIC_S	26
 #define FW_IQ_CMD_IQFLINTISCSIC_V(x)	((x) << FW_IQ_CMD_IQFLINTISCSIC_S)
 
+#define FW_IQ_CMD_IQTYPE_S		24
+#define FW_IQ_CMD_IQTYPE_M		0x3
+#define FW_IQ_CMD_IQTYPE_V(x)		((x) << FW_IQ_CMD_IQTYPE_S)
+#define FW_IQ_CMD_IQTYPE_G(x)		\
+	(((x) >> FW_IQ_CMD_IQTYPE_S) & FW_IQ_CMD_IQTYPE_M)
+
 #define FW_IQ_CMD_FL0CNGCHMAP_S		20
 #define FW_IQ_CMD_FL0CNGCHMAP_V(x)	((x) << FW_IQ_CMD_FL0CNGCHMAP_S)
 
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 81a66cce7fa8..18834619bb3a 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -12,6 +12,7 @@
 #ifndef _MVPP2_H_
 #define _MVPP2_H_
 
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/phy.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bbd2fd0b2e06..c7791d036e9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -349,7 +349,8 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 {
 	int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 
-	rq->mpwqe.info = kvzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
+	rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
+						  sizeof(*rq->mpwqe.info)),
 				       GFP_KERNEL, cpu_to_node(c->cpu));
 	if (!rq->mpwqe.info)
 		return -ENOMEM;
@@ -969,7 +970,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
-	sq->db.di = kvzalloc_node(sizeof(*sq->db.di) * wq_sz,
+	sq->db.di = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.di)),
 				  GFP_KERNEL, numa);
 	if (!sq->db.di) {
 		mlx5e_free_xdpsq_db(sq);
@@ -1028,7 +1029,8 @@ static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
 {
 	u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 
-	sq->db.ico_wqe = kvzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz,
+	sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
+						  sizeof(*sq->db.ico_wqe)),
 				       GFP_KERNEL, numa);
 	if (!sq->db.ico_wqe)
 		return -ENOMEM;
@@ -1083,9 +1085,11 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 	int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
 
-	sq->db.dma_fifo = kvzalloc_node(df_sz * sizeof(*sq->db.dma_fifo),
+	sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
+						   sizeof(*sq->db.dma_fifo)),
 					GFP_KERNEL, numa);
-	sq->db.wqe_info = kvzalloc_node(wq_sz * sizeof(*sq->db.wqe_info),
+	sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
+						   sizeof(*sq->db.wqe_info)),
 					GFP_KERNEL, numa);
 	if (!sq->db.dma_fifo || !sq->db.wqe_info) {
 		mlx5e_free_txqsq_db(sq);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 0cadcabfe86f..981e621ef9c2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -15,11 +15,16 @@ mlxsw_switchx2-objs		:= switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)	+= mlxsw_spectrum.o
 mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_switchdev.o spectrum_router.o \
-				   spectrum_kvdl.o spectrum_acl_tcam.o \
-				   spectrum_acl.o spectrum_flower.o \
-				   spectrum_cnt.o spectrum_fid.o \
-				   spectrum_ipip.o spectrum_acl_flex_actions.o \
-				   spectrum_mr.o spectrum_mr_tcam.o \
+				   spectrum1_kvdl.o spectrum_kvdl.o \
+				   spectrum_acl_tcam.o spectrum_acl_ctcam.o \
+				   spectrum1_acl_tcam.o \
+				   spectrum_acl.o \
+				   spectrum_flower.o spectrum_cnt.o \
+				   spectrum_fid.o spectrum_ipip.o \
+				   spectrum_acl_flex_actions.o \
+				   spectrum_acl_flex_keys.o \
+				   spectrum1_mr_tcam.o \
+				   spectrum_mr_tcam.o spectrum_mr.o \
 				   spectrum_qdisc.o spectrum_span.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index b32a00972e83..bf645215f514 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -43,6 +43,7 @@
 struct mlxsw_afk {
 	struct list_head key_info_list;
 	unsigned int max_blocks;
+	const struct mlxsw_afk_ops *ops;
 	const struct mlxsw_afk_block *blocks;
 	unsigned int blocks_count;
 };
@@ -69,8 +70,7 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk)
 }
 
 struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
-				   const struct mlxsw_afk_block *blocks,
-				   unsigned int blocks_count)
+				   const struct mlxsw_afk_ops *ops)
 {
 	struct mlxsw_afk *mlxsw_afk;
 
@@ -79,8 +79,9 @@ struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
 		return NULL;
 	INIT_LIST_HEAD(&mlxsw_afk->key_info_list);
 	mlxsw_afk->max_blocks = max_blocks;
-	mlxsw_afk->blocks = blocks;
-	mlxsw_afk->blocks_count = blocks_count;
+	mlxsw_afk->ops = ops;
+	mlxsw_afk->blocks = ops->blocks;
+	mlxsw_afk->blocks_count = ops->blocks_count;
 	WARN_ON(!mlxsw_afk_blocks_check(mlxsw_afk));
 	return mlxsw_afk;
 }
@@ -415,45 +416,8 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values,
 }
 EXPORT_SYMBOL(mlxsw_afk_values_add_buf);
 
-static void mlxsw_afk_encode_u32(const struct mlxsw_item *storage_item,
-				 const struct mlxsw_item *output_item,
-				 char *storage, char *output_indexed)
-{
-	u32 value;
-
-	value = __mlxsw_item_get32(storage, storage_item, 0);
-	__mlxsw_item_set32(output_indexed, output_item, 0, value);
-}
-
-static void mlxsw_afk_encode_buf(const struct mlxsw_item *storage_item,
-				 const struct mlxsw_item *output_item,
-				 char *storage, char *output_indexed)
-{
-	char *storage_data = __mlxsw_item_data(storage, storage_item, 0);
-	char *output_data = __mlxsw_item_data(output_indexed, output_item, 0);
-	size_t len = output_item->size.bytes;
-
-	memcpy(output_data, storage_data, len);
-}
-
-#define MLXSW_AFK_KEY_BLOCK_SIZE 16
-
-static void mlxsw_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
-				 int block_index, char *storage, char *output)
-{
-	char *output_indexed = output + block_index * MLXSW_AFK_KEY_BLOCK_SIZE;
-	const struct mlxsw_item *storage_item = &elinst->info->item;
-	const struct mlxsw_item *output_item = &elinst->item;
-
-	if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
-		mlxsw_afk_encode_u32(storage_item, output_item,
-				     storage, output_indexed);
-	else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
-		mlxsw_afk_encode_buf(storage_item, output_item,
-				     storage, output_indexed);
-}
-
-void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info,
+void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
+		      struct mlxsw_afk_key_info *key_info,
 		      struct mlxsw_afk_element_values *values,
 		      char *key, char *mask)
 {
@@ -466,10 +430,10 @@ void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info,
 						       &block_index);
 		if (!elinst)
 			continue;
-		mlxsw_afk_encode_one(elinst, block_index,
-				     values->storage.key, key);
-		mlxsw_afk_encode_one(elinst, block_index,
-				     values->storage.mask, mask);
+		mlxsw_afk->ops->encode_one(elinst, block_index,
+					   values->storage.key, key);
+		mlxsw_afk->ops->encode_one(elinst, block_index,
+					   values->storage.mask, mask);
 	}
 }
 EXPORT_SYMBOL(mlxsw_afk_encode);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index 154eccd64019..441636cd13d8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -216,9 +216,15 @@ mlxsw_afk_element_usage_subset(struct mlxsw_afk_element_usage *elusage_small,
 
 struct mlxsw_afk;
 
+struct mlxsw_afk_ops {
+	const struct mlxsw_afk_block *blocks;
+	unsigned int blocks_count;
+	void (*encode_one)(const struct mlxsw_afk_element_inst *elinst,
+			   int block_index, char *storage, char *output);
+};
+
 struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks,
-				   const struct mlxsw_afk_block *blocks,
-				   unsigned int blocks_count);
+				   const struct mlxsw_afk_ops *ops);
 void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk);
 
 struct mlxsw_afk_key_info;
@@ -251,7 +257,8 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values,
 			      enum mlxsw_afk_element element,
 			      const char *key_value, const char *mask_value,
 			      unsigned int len);
-void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info,
+void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk,
+		      struct mlxsw_afk_key_info *key_info,
 		      struct mlxsw_afk_element_values *values,
 		      char *key, char *mask);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 29c51ae91241..ccf4aae91630 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -2402,6 +2402,15 @@ MLXSW_ITEM32(reg, ptce2, op, 0x00, 20, 3);
  */
 MLXSW_ITEM32(reg, ptce2, offset, 0x00, 0, 16);
 
+/* reg_ptce2_priority
+ * Priority of the rule, higher values win. The range is 1..cap_kvd_size-1.
+ * Note: priority does not have to be unique per rule.
+ * Within a region, higher priority should have lower offset (no limitation
+ * between regions in a multi-region).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptce2, priority, 0x04, 0, 24);
+
 /* reg_ptce2_tcam_region_info
  * Opaque object that represents the TCAM region.
  * Access: Index
@@ -2437,12 +2446,13 @@ MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0,
 static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid,
 					enum mlxsw_reg_ptce2_op op,
 					const char *tcam_region_info,
-					u16 offset)
+					u16 offset, u32 priority)
 {
 	MLXSW_REG_ZERO(ptce2, payload);
 	mlxsw_reg_ptce2_v_set(payload, valid);
 	mlxsw_reg_ptce2_op_set(payload, op);
 	mlxsw_reg_ptce2_offset_set(payload, offset);
+	mlxsw_reg_ptce2_priority_set(payload, priority);
 	mlxsw_reg_ptce2_tcam_region_info_memcpy_to(payload, tcam_region_info);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index fd9299ccec72..f672a7b71de7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -42,6 +42,8 @@ enum mlxsw_res_id {
 	MLXSW_RES_ID_KVD_SIZE,
 	MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
 	MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
+	MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE,
+	MLXSW_RES_ID_MAX_KVD_ACTION_SETS,
 	MLXSW_RES_ID_MAX_TRAP_GROUPS,
 	MLXSW_RES_ID_CQE_V0,
 	MLXSW_RES_ID_CQE_V1,
@@ -83,6 +85,8 @@ static u16 mlxsw_res_ids[] = {
 	[MLXSW_RES_ID_KVD_SIZE] = 0x1001,
 	[MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
 	[MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
+	[MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE] = 0x1005,
+	[MLXSW_RES_ID_MAX_KVD_ACTION_SETS] = 0x1007,
 	[MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
 	[MLXSW_RES_ID_CQE_V0] = 0x2210,
 	[MLXSW_RES_ID_CQE_V1] = 0x2211,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index cba0e9ba7dea..5b2f4c6f5e57 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -74,15 +74,22 @@
 #include "spectrum_span.h"
 #include "../mlxfw/mlxfw.h"
 
-#define MLXSW_FWREV_MAJOR 13
-#define MLXSW_FWREV_MINOR 1620
-#define MLXSW_FWREV_SUBMINOR 192
-#define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
+#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
 
-#define MLXSW_SP_FW_FILENAME \
-	"mellanox/mlxsw_spectrum-" __stringify(MLXSW_FWREV_MAJOR) \
-	"." __stringify(MLXSW_FWREV_MINOR) \
-	"." __stringify(MLXSW_FWREV_SUBMINOR) ".mfa2"
+#define MLXSW_SP1_FWREV_MAJOR 13
+#define MLXSW_SP1_FWREV_MINOR 1620
+#define MLXSW_SP1_FWREV_SUBMINOR 192
+
+static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
+	.major = MLXSW_SP1_FWREV_MAJOR,
+	.minor = MLXSW_SP1_FWREV_MINOR,
+	.subminor = MLXSW_SP1_FWREV_SUBMINOR,
+};
+
+#define MLXSW_SP1_FW_FILENAME \
+	"mellanox/mlxsw_spectrum-" __stringify(MLXSW_SP1_FWREV_MAJOR) \
+	"." __stringify(MLXSW_SP1_FWREV_MINOR) \
+	"." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2"
 
 static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum";
 static const char mlxsw_sp_driver_version[] = "1.0";
@@ -338,29 +345,35 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
 {
 	const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev;
+	const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev;
+	const char *fw_filename = mlxsw_sp->fw_filename;
 	const struct firmware *firmware;
 	int err;
 
+	/* Don't check if driver does not require it */
+	if (!req_rev || !fw_filename)
+		return 0;
+
 	/* Validate driver & FW are compatible */
-	if (rev->major != MLXSW_FWREV_MAJOR) {
+	if (rev->major != req_rev->major) {
 		WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n",
-		     rev->major, MLXSW_FWREV_MAJOR);
+		     rev->major, req_rev->major);
 		return -EINVAL;
 	}
-	if (MLXSW_FWREV_MINOR_TO_BRANCH(rev->minor) ==
-	    MLXSW_FWREV_MINOR_TO_BRANCH(MLXSW_FWREV_MINOR))
+	if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
+	    MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor))
 		return 0;
 
 	dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
 		 rev->major, rev->minor, rev->subminor);
 	dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n",
-		 MLXSW_SP_FW_FILENAME);
+		 fw_filename);
 
-	err = request_firmware_direct(&firmware, MLXSW_SP_FW_FILENAME,
+	err = request_firmware_direct(&firmware, fw_filename,
 				      mlxsw_sp->bus_info->dev);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Could not request firmware file %s\n",
-			MLXSW_SP_FW_FILENAME);
+			fw_filename);
 		return err;
 	}
 
@@ -3621,7 +3634,13 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	int err;
 
+	mlxsw_sp->req_rev = &mlxsw_sp1_fw_rev;
+	mlxsw_sp->fw_filename = MLXSW_SP1_FW_FILENAME;
+	mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops;
+	mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
+	mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
+	mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
 
 	mlxsw_sp->core = mlxsw_core;
 	mlxsw_sp->bus_info = mlxsw_bus_info;
@@ -3880,7 +3899,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 	if (err)
 		return err;
 
-	err = mlxsw_sp_kvdl_resources_register(mlxsw_core);
+	err = mlxsw_sp1_kvdl_resources_register(mlxsw_core);
 	if  (err)
 		return err;
 
@@ -4741,4 +4760,4 @@ MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <[email protected]>");
 MODULE_DESCRIPTION("Mellanox Spectrum driver");
 MODULE_DEVICE_TABLE(pci, mlxsw_sp_pci_id_table);
-MODULE_FIRMWARE(MLXSW_SP_FW_FILENAME);
+MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f5294c9a4c92..eb3d3c0bdb03 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -145,6 +145,9 @@ struct mlxsw_sp_acl;
 struct mlxsw_sp_counter_pool;
 struct mlxsw_sp_fid_core;
 struct mlxsw_sp_kvdl;
+struct mlxsw_sp_kvdl_ops;
+struct mlxsw_sp_mr_tcam_ops;
+struct mlxsw_sp_acl_tcam_ops;
 
 struct mlxsw_sp {
 	struct mlxsw_sp_port **ports;
@@ -168,7 +171,13 @@ struct mlxsw_sp {
 		struct mlxsw_sp_span_entry *entries;
 		int entries_count;
 	} span;
+	const struct mlxsw_fw_rev *req_rev;
+	const char *fw_filename;
+	const struct mlxsw_sp_kvdl_ops *kvdl_ops;
 	const struct mlxsw_afa_ops *afa_ops;
+	const struct mlxsw_afk_ops *afk_ops;
+	const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
+	const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -436,15 +445,59 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
 
 /* spectrum_kvdl.c */
+enum mlxsw_sp_kvdl_entry_type {
+	MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+	MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+	MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+	MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+};
+
+static inline unsigned int
+mlxsw_sp_kvdl_entry_size(enum mlxsw_sp_kvdl_entry_type type)
+{
+	switch (type) {
+	case MLXSW_SP_KVDL_ENTRY_TYPE_ADJ: /* fall through */
+	case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: /* fall through */
+	case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: /* fall through */
+	case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: /* fall through */
+	default:
+		return 1;
+	}
+}
+
+struct mlxsw_sp_kvdl_ops {
+	size_t priv_size;
+	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	int (*alloc)(struct mlxsw_sp *mlxsw_sp, void *priv,
+		     enum mlxsw_sp_kvdl_entry_type type,
+		     unsigned int entry_count, u32 *p_entry_index);
+	void (*free)(struct mlxsw_sp *mlxsw_sp, void *priv,
+		     enum mlxsw_sp_kvdl_entry_type type,
+		     unsigned int entry_count, int entry_index);
+	int (*alloc_size_query)(struct mlxsw_sp *mlxsw_sp, void *priv,
+				enum mlxsw_sp_kvdl_entry_type type,
+				unsigned int entry_count,
+				unsigned int *p_alloc_count);
+	int (*resources_register)(struct mlxsw_sp *mlxsw_sp, void *priv);
+};
+
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
-			u32 *p_entry_index);
-void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
-int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
-				   unsigned int entry_count,
-				   unsigned int *p_alloc_size);
-int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_kvdl_entry_type type,
+			unsigned int entry_count, u32 *p_entry_index);
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_kvdl_entry_type type,
+			unsigned int entry_count, int entry_index);
+int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp,
+				    enum mlxsw_sp_kvdl_entry_type type,
+				    unsigned int entry_count,
+				    unsigned int *p_alloc_count);
+
+/* spectrum1_kvdl.c */
+extern const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops;
+int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
 
 struct mlxsw_sp_acl_rule_info {
 	unsigned int priority;
@@ -453,44 +506,14 @@ struct mlxsw_sp_acl_rule_info {
 	unsigned int counter_index;
 };
 
-enum mlxsw_sp_acl_profile {
-	MLXSW_SP_ACL_PROFILE_FLOWER,
-};
-
-struct mlxsw_sp_acl_profile_ops {
-	size_t ruleset_priv_size;
-	int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp,
-			   void *priv, void *ruleset_priv);
-	void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
-	int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
-			    struct mlxsw_sp_port *mlxsw_sp_port,
-			    bool ingress);
-	void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
-			       struct mlxsw_sp_port *mlxsw_sp_port,
-			       bool ingress);
-	u16 (*ruleset_group_id)(void *ruleset_priv);
-	size_t rule_priv_size;
-	int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
-			void *ruleset_priv, void *rule_priv,
-			struct mlxsw_sp_acl_rule_info *rulei);
-	void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
-	int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
-				 bool *activity);
-};
-
-struct mlxsw_sp_acl_ops {
-	size_t priv_size;
-	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
-	void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
-	const struct mlxsw_sp_acl_profile_ops *
-			(*profile_ops)(struct mlxsw_sp *mlxsw_sp,
-				       enum mlxsw_sp_acl_profile profile);
-};
-
 struct mlxsw_sp_acl_block;
 struct mlxsw_sp_acl_ruleset;
 
 /* spectrum_acl.c */
+enum mlxsw_sp_acl_profile {
+	MLXSW_SP_ACL_PROFILE_FLOWER,
+};
+
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
 struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block);
 unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block);
@@ -583,11 +606,45 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp);
 
 /* spectrum_acl_tcam.c */
-extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops;
+struct mlxsw_sp_acl_tcam;
+struct mlxsw_sp_acl_tcam_region;
+
+struct mlxsw_sp_acl_tcam_ops {
+	enum mlxsw_reg_ptar_key_type key_type;
+	size_t priv_size;
+	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv,
+		    struct mlxsw_sp_acl_tcam *tcam);
+	void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	size_t region_priv_size;
+	int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+			   struct mlxsw_sp_acl_tcam_region *region);
+	void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv);
+	size_t chunk_priv_size;
+	void (*chunk_init)(void *region_priv, void *chunk_priv,
+			   unsigned int priority);
+	void (*chunk_fini)(void *chunk_priv);
+	size_t entry_priv_size;
+	int (*entry_add)(struct mlxsw_sp *mlxsw_sp,
+			 void *region_priv, void *chunk_priv,
+			 void *entry_priv,
+			 struct mlxsw_sp_acl_rule_info *rulei);
+	void (*entry_del)(struct mlxsw_sp *mlxsw_sp,
+			  void *region_priv, void *chunk_priv,
+			  void *entry_priv);
+	int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp,
+				  void *region_priv, void *entry_priv,
+				  bool *activity);
+};
+
+/* spectrum1_acl_tcam.c */
+extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops;
 
 /* spectrum_acl_flex_actions.c */
 extern const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops;
 
+/* spectrum_acl_flex_keys.c */
+extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
+
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block,
@@ -635,4 +692,37 @@ void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp);
 
+/* spectrum_mr.c */
+enum mlxsw_sp_mr_route_prio {
+	MLXSW_SP_MR_ROUTE_PRIO_SG,
+	MLXSW_SP_MR_ROUTE_PRIO_STARG,
+	MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+	__MLXSW_SP_MR_ROUTE_PRIO_MAX
+};
+
+#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
+
+struct mlxsw_sp_mr_route_key;
+
+struct mlxsw_sp_mr_tcam_ops {
+	size_t priv_size;
+	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+	void (*fini)(void *priv);
+	size_t route_priv_size;
+	int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
+			    void *route_priv,
+			    struct mlxsw_sp_mr_route_key *key,
+			    struct mlxsw_afa_block *afa_block,
+			    enum mlxsw_sp_mr_route_prio prio);
+	void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
+			      void *route_priv,
+			      struct mlxsw_sp_mr_route_key *key);
+	int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+			    struct mlxsw_sp_mr_route_key *key,
+			    struct mlxsw_afa_block *afa_block);
+};
+
+/* spectrum1_mr_tcam.c */
+extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
new file mode 100644
index 000000000000..04f0c9cfae24
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
@@ -0,0 +1,245 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+struct mlxsw_sp1_acl_tcam_region {
+	struct mlxsw_sp_acl_ctcam_region cregion;
+	struct mlxsw_sp_acl_tcam_region *region;
+	struct {
+		struct mlxsw_sp_acl_ctcam_chunk cchunk;
+		struct mlxsw_sp_acl_ctcam_entry centry;
+		struct mlxsw_sp_acl_rule_info *rulei;
+	} catchall;
+};
+
+struct mlxsw_sp1_acl_tcam_chunk {
+	struct mlxsw_sp_acl_ctcam_chunk cchunk;
+};
+
+struct mlxsw_sp1_acl_tcam_entry {
+	struct mlxsw_sp_acl_ctcam_entry centry;
+};
+
+static int mlxsw_sp1_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv,
+				   struct mlxsw_sp_acl_tcam *tcam)
+{
+	return 0;
+}
+
+static void mlxsw_sp1_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+}
+
+static int
+mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp1_acl_tcam_region *region)
+{
+	struct mlxsw_sp_acl_rule_info *rulei;
+	int err;
+
+	mlxsw_sp_acl_ctcam_chunk_init(&region->cregion,
+				      &region->catchall.cchunk,
+				      MLXSW_SP_ACL_TCAM_CATCHALL_PRIO);
+	rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
+	if (IS_ERR(rulei)) {
+		err = PTR_ERR(rulei);
+		goto err_rulei_create;
+	}
+	err = mlxsw_sp_acl_rulei_act_continue(rulei);
+	if (WARN_ON(err))
+		goto err_rulei_act_continue;
+	err = mlxsw_sp_acl_rulei_commit(rulei);
+	if (err)
+		goto err_rulei_commit;
+	err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &region->cregion,
+					   &region->catchall.cchunk,
+					   &region->catchall.centry,
+					   rulei, false);
+	if (err)
+		goto err_entry_add;
+	region->catchall.rulei = rulei;
+	return 0;
+
+err_entry_add:
+err_rulei_commit:
+err_rulei_act_continue:
+	mlxsw_sp_acl_rulei_destroy(rulei);
+err_rulei_create:
+	mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
+	return err;
+}
+
+static void
+mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp1_acl_tcam_region *region)
+{
+	struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei;
+
+	mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
+				     &region->catchall.cchunk,
+				     &region->catchall.centry);
+	mlxsw_sp_acl_rulei_destroy(rulei);
+	mlxsw_sp_acl_ctcam_chunk_fini(&region->catchall.cchunk);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv,
+			       struct mlxsw_sp_acl_tcam_region *_region)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	int err;
+
+	err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &region->cregion,
+					     _region);
+	if (err)
+		return err;
+	err = mlxsw_sp1_acl_ctcam_region_catchall_add(mlxsw_sp, region);
+	if (err)
+		goto err_catchall_add;
+	region->region = _region;
+	return 0;
+
+err_catchall_add:
+	mlxsw_sp_acl_ctcam_region_fini(&region->cregion);
+	return err;
+}
+
+static void
+mlxsw_sp1_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+
+	mlxsw_sp1_acl_ctcam_region_catchall_del(mlxsw_sp, region);
+	mlxsw_sp_acl_ctcam_region_fini(&region->cregion);
+}
+
+static void mlxsw_sp1_acl_tcam_chunk_init(void *region_priv, void *chunk_priv,
+					  unsigned int priority)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+
+	mlxsw_sp_acl_ctcam_chunk_init(&region->cregion, &chunk->cchunk,
+				      priority);
+}
+
+static void mlxsw_sp1_acl_tcam_chunk_fini(void *chunk_priv)
+{
+	struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+
+	mlxsw_sp_acl_ctcam_chunk_fini(&chunk->cchunk);
+}
+
+static int mlxsw_sp1_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+					void *region_priv, void *chunk_priv,
+					void *entry_priv,
+					struct mlxsw_sp_acl_rule_info *rulei)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+	struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+
+	return mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &region->cregion,
+					    &chunk->cchunk, &entry->centry,
+					    rulei, false);
+}
+
+static void mlxsw_sp1_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+					 void *region_priv, void *chunk_priv,
+					 void *entry_priv)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv;
+	struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+
+	mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, &region->cregion,
+				     &chunk->cchunk, &entry->centry);
+}
+
+static int
+mlxsw_sp1_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+					     struct mlxsw_sp_acl_tcam_region *_region,
+					     unsigned int offset,
+					     bool *activity)
+{
+	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+	int err;
+
+	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ,
+			     _region->tcam_region_info, offset, 0);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+	if (err)
+		return err;
+	*activity = mlxsw_reg_ptce2_a_get(ptce2_pl);
+	return 0;
+}
+
+static int
+mlxsw_sp1_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
+				      void *region_priv, void *entry_priv,
+				      bool *activity)
+{
+	struct mlxsw_sp1_acl_tcam_region *region = region_priv;
+	struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv;
+	unsigned int offset;
+
+	offset = mlxsw_sp_acl_ctcam_entry_offset(&entry->centry);
+	return mlxsw_sp1_acl_tcam_region_entry_activity_get(mlxsw_sp,
+							    region->region,
+							    offset, activity);
+}
+
+const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops = {
+	.key_type		= MLXSW_REG_PTAR_KEY_TYPE_FLEX,
+	.priv_size		= 0,
+	.init			= mlxsw_sp1_acl_tcam_init,
+	.fini			= mlxsw_sp1_acl_tcam_fini,
+	.region_priv_size	= sizeof(struct mlxsw_sp1_acl_tcam_region),
+	.region_init		= mlxsw_sp1_acl_tcam_region_init,
+	.region_fini		= mlxsw_sp1_acl_tcam_region_fini,
+	.chunk_priv_size	= sizeof(struct mlxsw_sp1_acl_tcam_chunk),
+	.chunk_init		= mlxsw_sp1_acl_tcam_chunk_init,
+	.chunk_fini		= mlxsw_sp1_acl_tcam_chunk_fini,
+	.entry_priv_size	= sizeof(struct mlxsw_sp1_acl_tcam_entry),
+	.entry_add		= mlxsw_sp1_acl_tcam_entry_add,
+	.entry_del		= mlxsw_sp1_acl_tcam_entry_del,
+	.entry_activity_get	= mlxsw_sp1_acl_tcam_entry_activity_get,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
new file mode 100644
index 000000000000..0d4583894a97
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
@@ -0,0 +1,459 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2018 Jiri Pirko <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "spectrum.h"
+
+#define MLXSW_SP1_KVDL_SINGLE_BASE 0
+#define MLXSW_SP1_KVDL_SINGLE_SIZE 16384
+#define MLXSW_SP1_KVDL_SINGLE_END \
+	(MLXSW_SP1_KVDL_SINGLE_SIZE + MLXSW_SP1_KVDL_SINGLE_BASE - 1)
+
+#define MLXSW_SP1_KVDL_CHUNKS_BASE \
+	(MLXSW_SP1_KVDL_SINGLE_BASE + MLXSW_SP1_KVDL_SINGLE_SIZE)
+#define MLXSW_SP1_KVDL_CHUNKS_SIZE 49152
+#define MLXSW_SP1_KVDL_CHUNKS_END \
+	(MLXSW_SP1_KVDL_CHUNKS_SIZE + MLXSW_SP1_KVDL_CHUNKS_BASE - 1)
+
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE \
+	(MLXSW_SP1_KVDL_CHUNKS_BASE + MLXSW_SP1_KVDL_CHUNKS_SIZE)
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE \
+	(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE)
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_END \
+	(MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE - 1)
+
+#define MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE 1
+#define MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE 32
+#define MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512
+
+struct mlxsw_sp1_kvdl_part_info {
+	unsigned int part_index;
+	unsigned int start_index;
+	unsigned int end_index;
+	unsigned int alloc_size;
+	enum mlxsw_sp_resource_id resource_id;
+};
+
+enum mlxsw_sp1_kvdl_part_id {
+	MLXSW_SP1_KVDL_PART_ID_SINGLE,
+	MLXSW_SP1_KVDL_PART_ID_CHUNKS,
+	MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS,
+};
+
+#define MLXSW_SP1_KVDL_PART_INFO(id)				\
+[MLXSW_SP1_KVDL_PART_ID_##id] = {				\
+	.start_index = MLXSW_SP1_KVDL_##id##_BASE,		\
+	.end_index = MLXSW_SP1_KVDL_##id##_END,			\
+	.alloc_size = MLXSW_SP1_KVDL_##id##_ALLOC_SIZE,		\
+	.resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id,	\
+}
+
+static const struct mlxsw_sp1_kvdl_part_info mlxsw_sp1_kvdl_parts_info[] = {
+	MLXSW_SP1_KVDL_PART_INFO(SINGLE),
+	MLXSW_SP1_KVDL_PART_INFO(CHUNKS),
+	MLXSW_SP1_KVDL_PART_INFO(LARGE_CHUNKS),
+};
+
+#define MLXSW_SP1_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp1_kvdl_parts_info)
+
+struct mlxsw_sp1_kvdl_part {
+	struct mlxsw_sp1_kvdl_part_info info;
+	unsigned long usage[0];	/* Entries */
+};
+
+struct mlxsw_sp1_kvdl {
+	struct mlxsw_sp1_kvdl_part *parts[MLXSW_SP1_KVDL_PARTS_INFO_LEN];
+};
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_alloc_size_part(struct mlxsw_sp1_kvdl *kvdl,
+			       unsigned int alloc_size)
+{
+	struct mlxsw_sp1_kvdl_part *part, *min_part = NULL;
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+		part = kvdl->parts[i];
+		if (alloc_size <= part->info.alloc_size &&
+		    (!min_part ||
+		     part->info.alloc_size <= min_part->info.alloc_size))
+			min_part = part;
+	}
+
+	return min_part ?: ERR_PTR(-ENOBUFS);
+}
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_index_part(struct mlxsw_sp1_kvdl *kvdl, u32 kvdl_index)
+{
+	struct mlxsw_sp1_kvdl_part *part;
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+		part = kvdl->parts[i];
+		if (kvdl_index >= part->info.start_index &&
+		    kvdl_index <= part->info.end_index)
+			return part;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static u32
+mlxsw_sp1_kvdl_to_kvdl_index(const struct mlxsw_sp1_kvdl_part_info *info,
+			     unsigned int entry_index)
+{
+	return info->start_index + entry_index * info->alloc_size;
+}
+
+static unsigned int
+mlxsw_sp1_kvdl_to_entry_index(const struct mlxsw_sp1_kvdl_part_info *info,
+			      u32 kvdl_index)
+{
+	return (kvdl_index - info->start_index) / info->alloc_size;
+}
+
+static int mlxsw_sp1_kvdl_part_alloc(struct mlxsw_sp1_kvdl_part *part,
+				     u32 *p_kvdl_index)
+{
+	const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+	unsigned int entry_index, nr_entries;
+
+	nr_entries = (info->end_index - info->start_index + 1) /
+		     info->alloc_size;
+	entry_index = find_first_zero_bit(part->usage, nr_entries);
+	if (entry_index == nr_entries)
+		return -ENOBUFS;
+	__set_bit(entry_index, part->usage);
+
+	*p_kvdl_index = mlxsw_sp1_kvdl_to_kvdl_index(info, entry_index);
+
+	return 0;
+}
+
+static void mlxsw_sp1_kvdl_part_free(struct mlxsw_sp1_kvdl_part *part,
+				     u32 kvdl_index)
+{
+	const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+	unsigned int entry_index;
+
+	entry_index = mlxsw_sp1_kvdl_to_entry_index(info, kvdl_index);
+	__clear_bit(entry_index, part->usage);
+}
+
+static int mlxsw_sp1_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv,
+				enum mlxsw_sp_kvdl_entry_type type,
+				unsigned int entry_count,
+				u32 *p_entry_index)
+{
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	/* Find partition with smallest allocation size satisfying the
+	 * requested size.
+	 */
+	part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count);
+	if (IS_ERR(part))
+		return PTR_ERR(part);
+
+	return mlxsw_sp1_kvdl_part_alloc(part, p_entry_index);
+}
+
+static void mlxsw_sp1_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv,
+				enum mlxsw_sp_kvdl_entry_type type,
+				unsigned int entry_count, int entry_index)
+{
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = mlxsw_sp1_kvdl_index_part(kvdl, entry_index);
+	if (IS_ERR(part))
+		return;
+	mlxsw_sp1_kvdl_part_free(part, entry_index);
+}
+
+static int mlxsw_sp1_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
+					   void *priv,
+					   enum mlxsw_sp_kvdl_entry_type type,
+					   unsigned int entry_count,
+					   unsigned int *p_alloc_size)
+{
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count);
+	if (IS_ERR(part))
+		return PTR_ERR(part);
+
+	*p_alloc_size = part->info.alloc_size;
+
+	return 0;
+}
+
+static void mlxsw_sp1_kvdl_part_update(struct mlxsw_sp1_kvdl_part *part,
+				       struct mlxsw_sp1_kvdl_part *part_prev,
+				       unsigned int size)
+{
+	if (!part_prev) {
+		part->info.end_index = size - 1;
+	} else {
+		part->info.start_index = part_prev->info.end_index + 1;
+		part->info.end_index = part->info.start_index + size - 1;
+	}
+}
+
+static struct mlxsw_sp1_kvdl_part *
+mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
+			 const struct mlxsw_sp1_kvdl_part_info *info,
+			 struct mlxsw_sp1_kvdl_part *part_prev)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp1_kvdl_part *part;
+	bool need_update = true;
+	unsigned int nr_entries;
+	size_t usage_size;
+	u64 resource_size;
+	int err;
+
+	err = devlink_resource_size_get(devlink, info->resource_id,
+					&resource_size);
+	if (err) {
+		need_update = false;
+		resource_size = info->end_index - info->start_index + 1;
+	}
+
+	nr_entries = div_u64(resource_size, info->alloc_size);
+	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
+	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
+	if (!part)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&part->info, info, sizeof(part->info));
+
+	if (need_update)
+		mlxsw_sp1_kvdl_part_update(part, part_prev, resource_size);
+	return part;
+}
+
+static void mlxsw_sp1_kvdl_part_fini(struct mlxsw_sp1_kvdl_part *part)
+{
+	kfree(part);
+}
+
+static int mlxsw_sp1_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp1_kvdl *kvdl)
+{
+	const struct mlxsw_sp1_kvdl_part_info *info;
+	struct mlxsw_sp1_kvdl_part *part_prev = NULL;
+	int err, i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) {
+		info = &mlxsw_sp1_kvdl_parts_info[i];
+		kvdl->parts[i] = mlxsw_sp1_kvdl_part_init(mlxsw_sp, info,
+							  part_prev);
+		if (IS_ERR(kvdl->parts[i])) {
+			err = PTR_ERR(kvdl->parts[i]);
+			goto err_kvdl_part_init;
+		}
+		part_prev = kvdl->parts[i];
+	}
+	return 0;
+
+err_kvdl_part_init:
+	for (i--; i >= 0; i--)
+		mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]);
+	return err;
+}
+
+static void mlxsw_sp1_kvdl_parts_fini(struct mlxsw_sp1_kvdl *kvdl)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++)
+		mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]);
+}
+
+static u64 mlxsw_sp1_kvdl_part_occ(struct mlxsw_sp1_kvdl_part *part)
+{
+	const struct mlxsw_sp1_kvdl_part_info *info = &part->info;
+	unsigned int nr_entries;
+	int bit = -1;
+	u64 occ = 0;
+
+	nr_entries = (info->end_index -
+		      info->start_index + 1) /
+		      info->alloc_size;
+	while ((bit = find_next_bit(part->usage, nr_entries, bit + 1))
+		< nr_entries)
+		occ += info->alloc_size;
+	return occ;
+}
+
+static u64 mlxsw_sp1_kvdl_occ_get(void *priv)
+{
+	const struct mlxsw_sp1_kvdl *kvdl = priv;
+	u64 occ = 0;
+	int i;
+
+	for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++)
+		occ += mlxsw_sp1_kvdl_part_occ(kvdl->parts[i]);
+
+	return occ;
+}
+
+static u64 mlxsw_sp1_kvdl_single_occ_get(void *priv)
+{
+	const struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_SINGLE];
+	return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp1_kvdl_chunks_occ_get(void *priv)
+{
+	const struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_CHUNKS];
+	return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static u64 mlxsw_sp1_kvdl_large_chunks_occ_get(void *priv)
+{
+	const struct mlxsw_sp1_kvdl *kvdl = priv;
+	struct mlxsw_sp1_kvdl_part *part;
+
+	part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS];
+	return mlxsw_sp1_kvdl_part_occ(part);
+}
+
+static int mlxsw_sp1_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+	int err;
+
+	err = mlxsw_sp1_kvdl_parts_init(mlxsw_sp, kvdl);
+	if (err)
+		return err;
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR,
+					  mlxsw_sp1_kvdl_occ_get,
+					  kvdl);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+					  mlxsw_sp1_kvdl_single_occ_get,
+					  kvdl);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+					  mlxsw_sp1_kvdl_chunks_occ_get,
+					  kvdl);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+					  mlxsw_sp1_kvdl_large_chunks_occ_get,
+					  kvdl);
+	return 0;
+}
+
+static void mlxsw_sp1_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp1_kvdl *kvdl = priv;
+
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR);
+	mlxsw_sp1_kvdl_parts_fini(kvdl);
+}
+
+const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops = {
+	.priv_size = sizeof(struct mlxsw_sp1_kvdl),
+	.init = mlxsw_sp1_kvdl_init,
+	.fini = mlxsw_sp1_kvdl_fini,
+	.alloc = mlxsw_sp1_kvdl_alloc,
+	.free = mlxsw_sp1_kvdl_free,
+	.alloc_size_query = mlxsw_sp1_kvdl_alloc_size_query,
+};
+
+int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	static struct devlink_resource_size_params size_params;
+	u32 kvdl_max_size;
+	int err;
+
+	kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
+			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
+
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
+					MLXSW_SP1_KVDL_SINGLE_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&size_params);
+	if (err)
+		return err;
+
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
+					MLXSW_SP1_KVDL_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&size_params);
+	if (err)
+		return err;
+
+	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
+					  MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE,
+					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+					MLXSW_SP_RESOURCE_KVD_LINEAR,
+					&size_params);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
new file mode 100644
index 000000000000..fc649fe7134e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
@@ -0,0 +1,374 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <[email protected]>
+ * Copyright (c) 2018 Jiri Pirko <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_mr.h"
+
+struct mlxsw_sp1_mr_tcam_region {
+	struct mlxsw_sp *mlxsw_sp;
+	enum mlxsw_reg_rtar_key_type rtar_key_type;
+	struct parman *parman;
+	struct parman_prio *parman_prios;
+};
+
+struct mlxsw_sp1_mr_tcam {
+	struct mlxsw_sp1_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
+};
+
+struct mlxsw_sp1_mr_tcam_route {
+	struct parman_item parman_item;
+	struct parman_prio *parman_prio;
+};
+
+static int mlxsw_sp1_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
+					   struct parman_item *parman_item,
+					   struct mlxsw_sp_mr_route_key *key,
+					   struct mlxsw_afa_block *afa_block)
+{
+	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+	switch (key->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
+					  key->vrid,
+					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+					  ntohl(key->group.addr4),
+					  ntohl(key->group_mask.addr4),
+					  ntohl(key->source.addr4),
+					  ntohl(key->source_mask.addr4),
+					  mlxsw_afa_block_first_set(afa_block));
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
+					  key->vrid,
+					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+					  key->group.addr6,
+					  key->group_mask.addr6,
+					  key->source.addr6,
+					  key->source_mask.addr6,
+					  mlxsw_afa_block_first_set(afa_block));
+	}
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static int mlxsw_sp1_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp,
+					  struct parman_item *parman_item,
+					  struct mlxsw_sp_mr_route_key *key)
+{
+	struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
+	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+	switch (key->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
+					  key->vrid, 0, 0, 0, 0, 0, 0, NULL);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
+					  key->vrid, 0, 0, zero_addr, zero_addr,
+					  zero_addr, zero_addr, NULL);
+		break;
+	}
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static struct mlxsw_sp1_mr_tcam_region *
+mlxsw_sp1_mr_tcam_protocol_region(struct mlxsw_sp1_mr_tcam *mr_tcam,
+				  enum mlxsw_sp_l3proto proto)
+{
+	return &mr_tcam->tcam_regions[proto];
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_parman_item_add(struct mlxsw_sp1_mr_tcam *mr_tcam,
+					struct mlxsw_sp1_mr_tcam_route *route,
+					struct mlxsw_sp_mr_route_key *key,
+					enum mlxsw_sp_mr_route_prio prio)
+{
+	struct mlxsw_sp1_mr_tcam_region *tcam_region;
+	int err;
+
+	tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto);
+	err = parman_item_add(tcam_region->parman,
+			      &tcam_region->parman_prios[prio],
+			      &route->parman_item);
+	if (err)
+		return err;
+
+	route->parman_prio = &tcam_region->parman_prios[prio];
+	return 0;
+}
+
+static void
+mlxsw_sp1_mr_tcam_route_parman_item_remove(struct mlxsw_sp1_mr_tcam *mr_tcam,
+					   struct mlxsw_sp1_mr_tcam_route *route,
+					   struct mlxsw_sp_mr_route_key *key)
+{
+	struct mlxsw_sp1_mr_tcam_region *tcam_region;
+
+	tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto);
+	parman_item_remove(tcam_region->parman,
+			   route->parman_prio, &route->parman_item);
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+			       void *route_priv,
+			       struct mlxsw_sp_mr_route_key *key,
+			       struct mlxsw_afa_block *afa_block,
+			       enum mlxsw_sp_mr_route_prio prio)
+{
+	struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+	struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+	int err;
+
+	err = mlxsw_sp1_mr_tcam_route_parman_item_add(mr_tcam, route,
+						      key, prio);
+	if (err)
+		return err;
+
+	err = mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					      key, afa_block);
+	if (err)
+		goto err_route_replace;
+	return 0;
+
+err_route_replace:
+	mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key);
+	return err;
+}
+
+static void
+mlxsw_sp1_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv,
+				void *route_priv,
+				struct mlxsw_sp_mr_route_key *key)
+{
+	struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+	struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+
+	mlxsw_sp1_mr_tcam_route_remove(mlxsw_sp, &route->parman_item, key);
+	mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key);
+}
+
+static int
+mlxsw_sp1_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp,
+			       void *route_priv,
+			       struct mlxsw_sp_mr_route_key *key,
+			       struct mlxsw_afa_block *afa_block)
+{
+	struct mlxsw_sp1_mr_tcam_route *route = route_priv;
+
+	return mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+					       key, afa_block);
+}
+
+#define MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP 16
+
+static int
+mlxsw_sp1_mr_tcam_region_alloc(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
+			    mr_tcam_region->rtar_key_type,
+			    MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void
+mlxsw_sp1_mr_tcam_region_free(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
+			    mr_tcam_region->rtar_key_type, 0);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static int mlxsw_sp1_mr_tcam_region_parman_resize(void *priv,
+						  unsigned long new_count)
+{
+	struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv;
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rtar_pl[MLXSW_REG_RTAR_LEN];
+	u64 max_tcam_rules;
+
+	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+	if (new_count > max_tcam_rules)
+		return -EINVAL;
+	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
+			    mr_tcam_region->rtar_key_type, new_count);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void mlxsw_sp1_mr_tcam_region_parman_move(void *priv,
+						 unsigned long from_index,
+						 unsigned long to_index,
+						 unsigned long count)
+{
+	struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv;
+	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+	char rrcr_pl[MLXSW_REG_RRCR_LEN];
+
+	mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
+			    from_index, count,
+			    mr_tcam_region->rtar_key_type, to_index);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
+}
+
+static const struct parman_ops mlxsw_sp1_mr_tcam_region_parman_ops = {
+	.base_count	= MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT,
+	.resize_step	= MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP,
+	.resize		= mlxsw_sp1_mr_tcam_region_parman_resize,
+	.move		= mlxsw_sp1_mr_tcam_region_parman_move,
+	.algo		= PARMAN_ALGO_TYPE_LSORT,
+};
+
+static int
+mlxsw_sp1_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp1_mr_tcam_region *mr_tcam_region,
+			      enum mlxsw_reg_rtar_key_type rtar_key_type)
+{
+	struct parman_prio *parman_prios;
+	struct parman *parman;
+	int err;
+	int i;
+
+	mr_tcam_region->rtar_key_type = rtar_key_type;
+	mr_tcam_region->mlxsw_sp = mlxsw_sp;
+
+	err = mlxsw_sp1_mr_tcam_region_alloc(mr_tcam_region);
+	if (err)
+		return err;
+
+	parman = parman_create(&mlxsw_sp1_mr_tcam_region_parman_ops,
+			       mr_tcam_region);
+	if (!parman) {
+		err = -ENOMEM;
+		goto err_parman_create;
+	}
+	mr_tcam_region->parman = parman;
+
+	parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
+				     sizeof(*parman_prios), GFP_KERNEL);
+	if (!parman_prios) {
+		err = -ENOMEM;
+		goto err_parman_prios_alloc;
+	}
+	mr_tcam_region->parman_prios = parman_prios;
+
+	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+		parman_prio_init(mr_tcam_region->parman,
+				 &mr_tcam_region->parman_prios[i], i);
+	return 0;
+
+err_parman_prios_alloc:
+	parman_destroy(parman);
+err_parman_create:
+	mlxsw_sp1_mr_tcam_region_free(mr_tcam_region);
+	return err;
+}
+
+static void
+mlxsw_sp1_mr_tcam_region_fini(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+		parman_prio_fini(&mr_tcam_region->parman_prios[i]);
+	kfree(mr_tcam_region->parman_prios);
+	parman_destroy(mr_tcam_region->parman);
+	mlxsw_sp1_mr_tcam_region_free(mr_tcam_region);
+}
+
+static int mlxsw_sp1_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+	struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+	u32 rtar_key;
+	int err;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+		return -EIO;
+
+	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
+	err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp,
+					    &region[MLXSW_SP_L3_PROTO_IPV4],
+					    rtar_key);
+	if (err)
+		return err;
+
+	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
+	err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp,
+					    &region[MLXSW_SP_L3_PROTO_IPV6],
+					    rtar_key);
+	if (err)
+		goto err_ipv6_region_init;
+
+	return 0;
+
+err_ipv6_region_init:
+	mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+	return err;
+}
+
+static void mlxsw_sp1_mr_tcam_fini(void *priv)
+{
+	struct mlxsw_sp1_mr_tcam *mr_tcam = priv;
+	struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
+
+	mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
+	mlxsw_sp1_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+}
+
+const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops = {
+	.priv_size = sizeof(struct mlxsw_sp1_mr_tcam),
+	.init = mlxsw_sp1_mr_tcam_init,
+	.fini = mlxsw_sp1_mr_tcam_fini,
+	.route_priv_size = sizeof(struct mlxsw_sp1_mr_tcam_route),
+	.route_create = mlxsw_sp1_mr_tcam_route_create,
+	.route_destroy = mlxsw_sp1_mr_tcam_route_destroy,
+	.route_update = mlxsw_sp1_mr_tcam_route_update,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index b40569c67ddf..217621d79e26 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -48,13 +48,12 @@
 #include "spectrum.h"
 #include "core_acl_flex_keys.h"
 #include "core_acl_flex_actions.h"
-#include "spectrum_acl_flex_keys.h"
+#include "spectrum_acl_tcam.h"
 
 struct mlxsw_sp_acl {
 	struct mlxsw_sp *mlxsw_sp;
 	struct mlxsw_afk *afk;
 	struct mlxsw_sp_fid *dummy_fid;
-	const struct mlxsw_sp_acl_ops *ops;
 	struct rhashtable ruleset_ht;
 	struct list_head rules;
 	struct {
@@ -62,8 +61,7 @@ struct mlxsw_sp_acl {
 		unsigned long interval;	/* ms */
 #define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000
 	} rule_activity_update;
-	unsigned long priv[0];
-	/* priv has to be always the last item */
+	struct mlxsw_sp_acl_tcam tcam;
 };
 
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl)
@@ -339,7 +337,7 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_rhashtable_init;
 
-	err = ops->ruleset_add(mlxsw_sp, acl->priv, ruleset->priv);
+	err = ops->ruleset_add(mlxsw_sp, &acl->tcam, ruleset->priv);
 	if (err)
 		goto err_ops_ruleset_add;
 
@@ -409,7 +407,7 @@ mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 	struct mlxsw_sp_acl_ruleset *ruleset;
 
-	ops = acl->ops->profile_ops(mlxsw_sp, profile);
+	ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile);
 	if (!ops)
 		return ERR_PTR(-EINVAL);
 	ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, block, chain_index, ops);
@@ -427,7 +425,7 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 	struct mlxsw_sp_acl_ruleset *ruleset;
 
-	ops = acl->ops->profile_ops(mlxsw_sp, profile);
+	ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile);
 	if (!ops)
 		return ERR_PTR(-EINVAL);
 
@@ -634,7 +632,8 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	mlxsw_sp_acl_ruleset_ref_inc(ruleset);
-	rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, GFP_KERNEL);
+	rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp),
+		       GFP_KERNEL);
 	if (!rule) {
 		err = -ENOMEM;
 		goto err_alloc;
@@ -825,20 +824,20 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
 
 int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 {
-	const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops;
 	struct mlxsw_sp_fid *fid;
 	struct mlxsw_sp_acl *acl;
+	size_t alloc_size;
 	int err;
 
-	acl = kzalloc(sizeof(*acl) + acl_ops->priv_size, GFP_KERNEL);
+	alloc_size = sizeof(*acl) + mlxsw_sp_acl_tcam_priv_size(mlxsw_sp);
+	acl = kzalloc(alloc_size, GFP_KERNEL);
 	if (!acl)
 		return -ENOMEM;
 	mlxsw_sp->acl = acl;
 	acl->mlxsw_sp = mlxsw_sp;
 	acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
 						       ACL_FLEX_KEYS),
-				    mlxsw_sp1_afk_blocks,
-				    MLXSW_SP1_AFK_BLOCKS_COUNT);
+				    mlxsw_sp->afk_ops);
 	if (!acl->afk) {
 		err = -ENOMEM;
 		goto err_afk_create;
@@ -857,12 +856,10 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
 	acl->dummy_fid = fid;
 
 	INIT_LIST_HEAD(&acl->rules);
-	err = acl_ops->init(mlxsw_sp, acl->priv);
+	err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam);
 	if (err)
 		goto err_acl_ops_init;
 
-	acl->ops = acl_ops;
-
 	/* Create the delayed work for the rule activity_update */
 	INIT_DELAYED_WORK(&acl->rule_activity_update.dw,
 			  mlxsw_sp_acl_rul_activity_update_work);
@@ -884,10 +881,9 @@ err_afk_create:
 void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
-	const struct mlxsw_sp_acl_ops *acl_ops = acl->ops;
 
 	cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw);
-	acl_ops->fini(mlxsw_sp, acl->priv);
+	mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam);
 	WARN_ON(!list_empty(&acl->rules));
 	mlxsw_sp_fid_put(acl->dummy_fid);
 	rhashtable_destroy(&acl->ruleset_ht);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
new file mode 100644
index 000000000000..ef0d4c0a5a1f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
@@ -0,0 +1,215 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "core.h"
+#include "spectrum.h"
+#include "spectrum_acl_tcam.h"
+
+static int
+mlxsw_sp_acl_ctcam_region_resize(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_tcam_region *region,
+				 u16 new_size)
+{
+	char ptar_pl[MLXSW_REG_PTAR_LEN];
+
+	mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE,
+			    region->key_type, new_size, region->id,
+			    region->tcam_region_info);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
+}
+
+static void
+mlxsw_sp_acl_ctcam_region_move(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_acl_tcam_region *region,
+			       u16 src_offset, u16 dst_offset, u16 size)
+{
+	char prcr_pl[MLXSW_REG_PRCR_LEN];
+
+	mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE,
+			    region->tcam_region_info, src_offset,
+			    region->tcam_region_info, dst_offset, size);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl);
+}
+
+static int
+mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_acl_tcam_region *region,
+				       unsigned int offset,
+				       struct mlxsw_sp_acl_rule_info *rulei,
+				       bool fillup_priority)
+{
+	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
+	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+	char *act_set;
+	u32 priority;
+	char *mask;
+	char *key;
+	int err;
+
+	err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority,
+					     fillup_priority);
+	if (err)
+		return err;
+
+	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
+			     region->tcam_region_info, offset, priority);
+	key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl);
+	mask = mlxsw_reg_ptce2_mask_data(ptce2_pl);
+	mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask);
+
+	/* Only the first action set belongs here, the rest is in KVD */
+	act_set = mlxsw_afa_block_first_set(rulei->act_block);
+	mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+}
+
+static void
+mlxsw_sp_acl_ctcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_acl_tcam_region *region,
+				       unsigned int offset)
+{
+	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
+
+	mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
+			     region->tcam_region_info, offset, 0);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
+}
+
+static int mlxsw_sp_acl_ctcam_region_parman_resize(void *priv,
+						   unsigned long new_count)
+{
+	struct mlxsw_sp_acl_ctcam_region *cregion = priv;
+	struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+	struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+	u64 max_tcam_rules;
+
+	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+	if (new_count > max_tcam_rules)
+		return -EINVAL;
+	return mlxsw_sp_acl_ctcam_region_resize(mlxsw_sp, region, new_count);
+}
+
+static void mlxsw_sp_acl_ctcam_region_parman_move(void *priv,
+						  unsigned long from_index,
+						  unsigned long to_index,
+						  unsigned long count)
+{
+	struct mlxsw_sp_acl_ctcam_region *cregion = priv;
+	struct mlxsw_sp_acl_tcam_region *region = cregion->region;
+	struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
+
+	mlxsw_sp_acl_ctcam_region_move(mlxsw_sp, region,
+				       from_index, to_index, count);
+}
+
+static const struct parman_ops mlxsw_sp_acl_ctcam_region_parman_ops = {
+	.base_count	= MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
+	.resize_step	= MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP,
+	.resize		= mlxsw_sp_acl_ctcam_region_parman_resize,
+	.move		= mlxsw_sp_acl_ctcam_region_parman_move,
+	.algo		= PARMAN_ALGO_TYPE_LSORT,
+};
+
+int mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_acl_ctcam_region *cregion,
+				   struct mlxsw_sp_acl_tcam_region *region)
+{
+	cregion->region = region;
+	cregion->parman = parman_create(&mlxsw_sp_acl_ctcam_region_parman_ops,
+					cregion);
+	if (!cregion->parman)
+		return -ENOMEM;
+	return 0;
+}
+
+void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion)
+{
+	parman_destroy(cregion->parman);
+}
+
+void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion,
+				   struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				   unsigned int priority)
+{
+	parman_prio_init(cregion->parman, &cchunk->parman_prio, priority);
+}
+
+void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk)
+{
+	parman_prio_fini(&cchunk->parman_prio);
+}
+
+int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_ctcam_region *cregion,
+				 struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				 struct mlxsw_sp_acl_ctcam_entry *centry,
+				 struct mlxsw_sp_acl_rule_info *rulei,
+				 bool fillup_priority)
+{
+	int err;
+
+	err = parman_item_add(cregion->parman, &cchunk->parman_prio,
+			      &centry->parman_item);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_acl_ctcam_region_entry_insert(mlxsw_sp, cregion->region,
+						     centry->parman_item.index,
+						     rulei, fillup_priority);
+	if (err)
+		goto err_rule_insert;
+	return 0;
+
+err_rule_insert:
+	parman_item_remove(cregion->parman, &cchunk->parman_prio,
+			   &centry->parman_item);
+	return err;
+}
+
+void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_ctcam_region *cregion,
+				  struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				  struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+	mlxsw_sp_acl_ctcam_region_entry_remove(mlxsw_sp, cregion->region,
+					       centry->parman_item.index);
+	parman_item_remove(cregion->parman, &cchunk->parman_prio,
+			   &centry->parman_item);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index 5ab070916949..6a7c3406b724 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -37,8 +37,6 @@
 #include "core_acl_flex_actions.h"
 #include "spectrum_span.h"
 
-#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
-
 static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
 				     char *enc_actions, bool is_first)
 {
@@ -53,8 +51,8 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
 	if (is_first)
 		return 0;
 
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE,
-				  &kvdl_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+				  1, &kvdl_index);
 	if (err)
 		return err;
 	mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions);
@@ -65,7 +63,8 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
 	return 0;
 
 err_pefa_write:
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+			   1, kvdl_index);
 	return err;
 }
 
@@ -76,7 +75,8 @@ static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
 
 	if (is_first)
 		return;
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
+			   1, kvdl_index);
 }
 
 static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
@@ -87,7 +87,8 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
 	u32 kvdl_index;
 	int err;
 
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+				  1, &kvdl_index);
 	if (err)
 		return err;
 	mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
@@ -98,7 +99,8 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
 	return 0;
 
 err_ppbs_write:
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+			   1, kvdl_index);
 	return err;
 }
 
@@ -106,7 +108,8 @@ static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 
-	mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
+			   1, kvdl_index);
 }
 
 static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
index a120c0dccd78..80f22b7c21da 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
@@ -1,7 +1,7 @@
 /*
- * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Jiri Pirko <[email protected]>
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -32,9 +32,10 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
-#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
-
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include "spectrum.h"
+#include "item.h"
 #include "core_acl_flex_keys.h"
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = {
@@ -126,6 +127,48 @@ static const struct mlxsw_afk_block mlxsw_sp1_afk_blocks[] = {
 	MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type),
 };
 
-#define MLXSW_SP1_AFK_BLOCKS_COUNT ARRAY_SIZE(mlxsw_sp1_afk_blocks)
-
-#endif
+static void mlxsw_sp1_afk_encode_u32(const struct mlxsw_item *storage_item,
+				     const struct mlxsw_item *output_item,
+				     char *storage, char *output_indexed)
+{
+	u32 value;
+
+	value = __mlxsw_item_get32(storage, storage_item, 0);
+	__mlxsw_item_set32(output_indexed, output_item, 0, value);
+}
+
+static void mlxsw_sp1_afk_encode_buf(const struct mlxsw_item *storage_item,
+				     const struct mlxsw_item *output_item,
+				     char *storage, char *output_indexed)
+{
+	char *storage_data = __mlxsw_item_data(storage, storage_item, 0);
+	char *output_data = __mlxsw_item_data(output_indexed, output_item, 0);
+	size_t len = output_item->size.bytes;
+
+	memcpy(output_data, storage_data, len);
+}
+
+#define MLXSW_SP1_AFK_KEY_BLOCK_SIZE 16
+
+static void
+mlxsw_sp1_afk_encode_one(const struct mlxsw_afk_element_inst *elinst,
+			 int block_index, char *storage, char *output)
+{
+	unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE;
+	char *output_indexed = output + offset;
+	const struct mlxsw_item *storage_item = &elinst->info->item;
+	const struct mlxsw_item *output_item = &elinst->item;
+
+	if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32)
+		mlxsw_sp1_afk_encode_u32(storage_item, output_item,
+					 storage, output_indexed);
+	else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF)
+		mlxsw_sp1_afk_encode_buf(storage_item, output_item,
+					 storage, output_indexed);
+}
+
+const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = {
+	.blocks		= mlxsw_sp1_afk_blocks,
+	.blocks_count	= ARRAY_SIZE(mlxsw_sp1_afk_blocks),
+	.encode_one	= mlxsw_sp1_afk_encode_one,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index efbd2062b6ec..53fe51a8d720 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2017 Jiri Pirko <[email protected]>
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -39,25 +39,25 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
-#include <linux/parman.h>
 
 #include "reg.h"
 #include "core.h"
 #include "resources.h"
 #include "spectrum.h"
+#include "spectrum_acl_tcam.h"
 #include "core_acl_flex_keys.h"
 
-struct mlxsw_sp_acl_tcam {
-	unsigned long *used_regions; /* bit array */
-	unsigned int max_regions;
-	unsigned long *used_groups;  /* bit array */
-	unsigned int max_groups;
-	unsigned int max_group_size;
-};
+size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+	return ops->priv_size;
+}
 
-static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_acl_tcam *tcam)
 {
-	struct mlxsw_sp_acl_tcam *tcam = priv;
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	u64 max_tcam_regions;
 	u64 max_regions;
 	u64 max_groups;
@@ -88,21 +88,53 @@ static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 	tcam->max_groups = max_groups;
 	tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
 						 ACL_MAX_GROUP_SIZE);
+
+	err = ops->init(mlxsw_sp, tcam->priv, tcam);
+	if (err)
+		goto err_tcam_init;
+
 	return 0;
 
+err_tcam_init:
+	kfree(tcam->used_groups);
 err_alloc_used_groups:
 	kfree(tcam->used_regions);
 	return err;
 }
 
-static void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
+void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_acl_tcam *tcam)
 {
-	struct mlxsw_sp_acl_tcam *tcam = priv;
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 
+	ops->fini(mlxsw_sp, tcam->priv);
 	kfree(tcam->used_groups);
 	kfree(tcam->used_regions);
 }
 
+int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_acl_rule_info *rulei,
+				   u32 *priority, bool fillup_priority)
+{
+	u64 max_priority;
+
+	if (!fillup_priority) {
+		*priority = 0;
+		return 0;
+	}
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, KVD_SIZE))
+		return -EIO;
+
+	max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE);
+	if (rulei->priority > max_priority)
+		return -EINVAL;
+
+	/* Unlike in TC, in HW, higher number means higher priority. */
+	*priority = max_priority - rulei->priority;
+	return 0;
+}
+
 static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam,
 					   u16 *p_id)
 {
@@ -159,36 +191,21 @@ struct mlxsw_sp_acl_tcam_group {
 	unsigned int patterns_count;
 };
 
-struct mlxsw_sp_acl_tcam_region {
-	struct list_head list; /* Member of a TCAM group */
-	struct list_head chunk_list; /* List of chunks under this region */
-	struct parman *parman;
-	struct mlxsw_sp *mlxsw_sp;
-	struct mlxsw_sp_acl_tcam_group *group;
-	enum mlxsw_reg_ptar_key_type key_type;
-	u16 id; /* ACL ID and region ID - they are same */
-	char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
-	struct mlxsw_afk_key_info *key_info;
-	struct {
-		struct parman_prio parman_prio;
-		struct parman_item parman_item;
-		struct mlxsw_sp_acl_rule_info *rulei;
-	} catchall;
-};
-
 struct mlxsw_sp_acl_tcam_chunk {
 	struct list_head list; /* Member of a TCAM region */
 	struct rhash_head ht_node; /* Member of a chunk HT */
 	unsigned int priority; /* Priority within the region and group */
-	struct parman_prio parman_prio;
 	struct mlxsw_sp_acl_tcam_group *group;
 	struct mlxsw_sp_acl_tcam_region *region;
 	unsigned int ref_count;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
 };
 
 struct mlxsw_sp_acl_tcam_entry {
-	struct parman_item parman_item;
 	struct mlxsw_sp_acl_tcam_chunk *chunk;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
 };
 
 static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = {
@@ -442,9 +459,6 @@ mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group,
 	memcpy(out, elusage, sizeof(*out));
 }
 
-#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16
-#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16
-
 static int
 mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_acl_tcam_region *region)
@@ -486,19 +500,6 @@ mlxsw_sp_acl_tcam_region_free(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int
-mlxsw_sp_acl_tcam_region_resize(struct mlxsw_sp *mlxsw_sp,
-				struct mlxsw_sp_acl_tcam_region *region,
-				u16 new_size)
-{
-	char ptar_pl[MLXSW_REG_PTAR_LEN];
-
-	mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE,
-			    region->key_type, new_size, region->id,
-			    region->tcam_region_info);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl);
-}
-
-static int
 mlxsw_sp_acl_tcam_region_enable(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_tcam_region *region)
 {
@@ -520,193 +521,22 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl);
 }
 
-static int
-mlxsw_sp_acl_tcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_acl_tcam_region *region,
-				      unsigned int offset,
-				      struct mlxsw_sp_acl_rule_info *rulei)
-{
-	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-	char *act_set;
-	char *mask;
-	char *key;
-
-	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
-			     region->tcam_region_info, offset);
-	key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl);
-	mask = mlxsw_reg_ptce2_mask_data(ptce2_pl);
-	mlxsw_afk_encode(region->key_info, &rulei->values, key, mask);
-
-	/* Only the first action set belongs here, the rest is in KVD */
-	act_set = mlxsw_afa_block_first_set(rulei->act_block);
-	mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set);
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-}
-
-static void
-mlxsw_sp_acl_tcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_acl_tcam_region *region,
-				      unsigned int offset)
-{
-	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-
-	mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE,
-			     region->tcam_region_info, offset);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-}
-
-static int
-mlxsw_sp_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
-					    struct mlxsw_sp_acl_tcam_region *region,
-					    unsigned int offset,
-					    bool *activity)
-{
-	char ptce2_pl[MLXSW_REG_PTCE2_LEN];
-	int err;
-
-	mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ,
-			     region->tcam_region_info, offset);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl);
-	if (err)
-		return err;
-	*activity = mlxsw_reg_ptce2_a_get(ptce2_pl);
-	return 0;
-}
-
-#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U)
-
-static int
-mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_acl_tcam_region *region)
-{
-	struct parman_prio *parman_prio = &region->catchall.parman_prio;
-	struct parman_item *parman_item = &region->catchall.parman_item;
-	struct mlxsw_sp_acl_rule_info *rulei;
-	int err;
-
-	parman_prio_init(region->parman, parman_prio,
-			 MLXSW_SP_ACL_TCAM_CATCHALL_PRIO);
-	err = parman_item_add(region->parman, parman_prio, parman_item);
-	if (err)
-		goto err_parman_item_add;
-
-	rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl);
-	if (IS_ERR(rulei)) {
-		err = PTR_ERR(rulei);
-		goto err_rulei_create;
-	}
-
-	err = mlxsw_sp_acl_rulei_act_continue(rulei);
-	if (WARN_ON(err))
-		goto err_rulei_act_continue;
-
-	err = mlxsw_sp_acl_rulei_commit(rulei);
-	if (err)
-		goto err_rulei_commit;
-
-	err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region,
-						    parman_item->index, rulei);
-	region->catchall.rulei = rulei;
-	if (err)
-		goto err_rule_insert;
-
-	return 0;
-
-err_rule_insert:
-err_rulei_commit:
-err_rulei_act_continue:
-	mlxsw_sp_acl_rulei_destroy(rulei);
-err_rulei_create:
-	parman_item_remove(region->parman, parman_prio, parman_item);
-err_parman_item_add:
-	parman_prio_fini(parman_prio);
-	return err;
-}
-
-static void
-mlxsw_sp_acl_tcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_acl_tcam_region *region)
-{
-	struct parman_prio *parman_prio = &region->catchall.parman_prio;
-	struct parman_item *parman_item = &region->catchall.parman_item;
-	struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei;
-
-	mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region,
-					      parman_item->index);
-	mlxsw_sp_acl_rulei_destroy(rulei);
-	parman_item_remove(region->parman, parman_prio, parman_item);
-	parman_prio_fini(parman_prio);
-}
-
-static void
-mlxsw_sp_acl_tcam_region_move(struct mlxsw_sp *mlxsw_sp,
-			      struct mlxsw_sp_acl_tcam_region *region,
-			      u16 src_offset, u16 dst_offset, u16 size)
-{
-	char prcr_pl[MLXSW_REG_PRCR_LEN];
-
-	mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE,
-			    region->tcam_region_info, src_offset,
-			    region->tcam_region_info, dst_offset, size);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl);
-}
-
-static int mlxsw_sp_acl_tcam_region_parman_resize(void *priv,
-						  unsigned long new_count)
-{
-	struct mlxsw_sp_acl_tcam_region *region = priv;
-	struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
-	u64 max_tcam_rules;
-
-	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
-	if (new_count > max_tcam_rules)
-		return -EINVAL;
-	return mlxsw_sp_acl_tcam_region_resize(mlxsw_sp, region, new_count);
-}
-
-static void mlxsw_sp_acl_tcam_region_parman_move(void *priv,
-						 unsigned long from_index,
-						 unsigned long to_index,
-						 unsigned long count)
-{
-	struct mlxsw_sp_acl_tcam_region *region = priv;
-	struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp;
-
-	mlxsw_sp_acl_tcam_region_move(mlxsw_sp, region,
-				      from_index, to_index, count);
-}
-
-static const struct parman_ops mlxsw_sp_acl_tcam_region_parman_ops = {
-	.base_count	= MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT,
-	.resize_step	= MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP,
-	.resize		= mlxsw_sp_acl_tcam_region_parman_resize,
-	.move		= mlxsw_sp_acl_tcam_region_parman_move,
-	.algo		= PARMAN_ALGO_TYPE_LSORT,
-};
-
 static struct mlxsw_sp_acl_tcam_region *
 mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_tcam *tcam,
 				struct mlxsw_afk_element_usage *elusage)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl);
 	struct mlxsw_sp_acl_tcam_region *region;
 	int err;
 
-	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL);
 	if (!region)
 		return ERR_PTR(-ENOMEM);
 	INIT_LIST_HEAD(&region->chunk_list);
 	region->mlxsw_sp = mlxsw_sp;
 
-	region->parman = parman_create(&mlxsw_sp_acl_tcam_region_parman_ops,
-				       region);
-	if (!region->parman) {
-		err = -ENOMEM;
-		goto err_parman_create;
-	}
-
 	region->key_info = mlxsw_afk_key_info_get(afk, elusage);
 	if (IS_ERR(region->key_info)) {
 		err = PTR_ERR(region->key_info);
@@ -717,7 +547,7 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_region_id_get;
 
-	region->key_type = MLXSW_REG_PTAR_KEY_TYPE_FLEX;
+	region->key_type = ops->key_type;
 	err = mlxsw_sp_acl_tcam_region_alloc(mlxsw_sp, region);
 	if (err)
 		goto err_tcam_region_alloc;
@@ -726,13 +556,13 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_tcam_region_enable;
 
-	err = mlxsw_sp_acl_tcam_region_catchall_add(mlxsw_sp, region);
+	err = ops->region_init(mlxsw_sp, region->priv, region);
 	if (err)
-		goto err_tcam_region_catchall_add;
+		goto err_tcam_region_init;
 
 	return region;
 
-err_tcam_region_catchall_add:
+err_tcam_region_init:
 	mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
 err_tcam_region_enable:
 	mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
@@ -741,8 +571,6 @@ err_tcam_region_alloc:
 err_region_id_get:
 	mlxsw_afk_key_info_put(region->key_info);
 err_key_info_get:
-	parman_destroy(region->parman);
-err_parman_create:
 	kfree(region);
 	return ERR_PTR(err);
 }
@@ -751,12 +579,13 @@ static void
 mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_tcam_region *region)
 {
-	mlxsw_sp_acl_tcam_region_catchall_del(mlxsw_sp, region);
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+	ops->region_fini(mlxsw_sp, region->priv);
 	mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region);
 	mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region);
 	mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id);
 	mlxsw_afk_key_info_put(region->key_info);
-	parman_destroy(region->parman);
 	kfree(region);
 }
 
@@ -831,13 +660,14 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
 			       unsigned int priority,
 			       struct mlxsw_afk_element_usage *elusage)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk;
 	int err;
 
 	if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO)
 		return ERR_PTR(-EINVAL);
 
-	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+	chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL);
 	if (!chunk)
 		return ERR_PTR(-ENOMEM);
 	chunk->priority = priority;
@@ -849,7 +679,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_chunk_assoc;
 
-	parman_prio_init(chunk->region->parman, &chunk->parman_prio, priority);
+	ops->chunk_init(chunk->region->priv, chunk->priv, priority);
 
 	err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node,
 				     mlxsw_sp_acl_tcam_chunk_ht_params);
@@ -859,7 +689,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp,
 	return chunk;
 
 err_rhashtable_insert:
-	parman_prio_fini(&chunk->parman_prio);
+	ops->chunk_fini(chunk->priv);
 	mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
 err_chunk_assoc:
 	kfree(chunk);
@@ -870,11 +700,12 @@ static void
 mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_tcam_chunk *chunk)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_group *group = chunk->group;
 
 	rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node,
 			       mlxsw_sp_acl_tcam_chunk_ht_params);
-	parman_prio_fini(&chunk->parman_prio);
+	ops->chunk_fini(chunk->priv);
 	mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk);
 	kfree(chunk);
 }
@@ -908,11 +739,19 @@ static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk);
 }
 
+static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
+
+	return ops->entry_priv_size;
+}
+
 static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_acl_tcam_group *group,
 				       struct mlxsw_sp_acl_tcam_entry *entry,
 				       struct mlxsw_sp_acl_rule_info *rulei)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk;
 	struct mlxsw_sp_acl_tcam_region *region;
 	int err;
@@ -923,24 +762,16 @@ static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp,
 		return PTR_ERR(chunk);
 
 	region = chunk->region;
-	err = parman_item_add(region->parman, &chunk->parman_prio,
-			      &entry->parman_item);
-	if (err)
-		goto err_parman_item_add;
 
-	err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region,
-						    entry->parman_item.index,
-						    rulei);
+	err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv,
+			     entry->priv, rulei);
 	if (err)
-		goto err_rule_insert;
+		goto err_entry_add;
 	entry->chunk = chunk;
 
 	return 0;
 
-err_rule_insert:
-	parman_item_remove(region->parman, &chunk->parman_prio,
-			   &entry->parman_item);
-err_parman_item_add:
+err_entry_add:
 	mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
 	return err;
 }
@@ -948,13 +779,11 @@ err_parman_item_add:
 static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp,
 					struct mlxsw_sp_acl_tcam_entry *entry)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
 	struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-	mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region,
-					      entry->parman_item.index);
-	parman_item_remove(region->parman, &chunk->parman_prio,
-			   &entry->parman_item);
+	ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv);
 	mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk);
 }
 
@@ -963,12 +792,12 @@ mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_acl_tcam_entry *entry,
 				     bool *activity)
 {
+	const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
 	struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk;
 	struct mlxsw_sp_acl_tcam_region *region = chunk->region;
 
-	return mlxsw_sp_acl_tcam_region_entry_activity_get(mlxsw_sp, region,
-							   entry->parman_item.index,
-							   activity);
+	return ops->entry_activity_get(mlxsw_sp, region->priv,
+				       entry->priv, activity);
 }
 
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
@@ -1030,10 +859,10 @@ struct mlxsw_sp_acl_tcam_flower_rule {
 
 static int
 mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp,
-				     void *priv, void *ruleset_priv)
+				     struct mlxsw_sp_acl_tcam *tcam,
+				     void *ruleset_priv)
 {
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
-	struct mlxsw_sp_acl_tcam *tcam = priv;
 
 	return mlxsw_sp_acl_tcam_group_add(mlxsw_sp, tcam, &ruleset->group,
 					   mlxsw_sp_acl_tcam_patterns,
@@ -1081,6 +910,12 @@ mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv)
 	return mlxsw_sp_acl_tcam_group_id(&ruleset->group);
 }
 
+static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp)
+{
+	return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) +
+	       mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp);
+}
+
 static int
 mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp,
 				  void *ruleset_priv, void *rule_priv,
@@ -1118,7 +953,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
 	.ruleset_bind		= mlxsw_sp_acl_tcam_flower_ruleset_bind,
 	.ruleset_unbind		= mlxsw_sp_acl_tcam_flower_ruleset_unbind,
 	.ruleset_group_id	= mlxsw_sp_acl_tcam_flower_ruleset_group_id,
-	.rule_priv_size		= sizeof(struct mlxsw_sp_acl_tcam_flower_rule),
+	.rule_priv_size		= mlxsw_sp_acl_tcam_flower_rule_priv_size,
 	.rule_add		= mlxsw_sp_acl_tcam_flower_rule_add,
 	.rule_del		= mlxsw_sp_acl_tcam_flower_rule_del,
 	.rule_activity_get	= mlxsw_sp_acl_tcam_flower_rule_activity_get,
@@ -1129,7 +964,7 @@ mlxsw_sp_acl_tcam_profile_ops_arr[] = {
 	[MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops,
 };
 
-static const struct mlxsw_sp_acl_profile_ops *
+const struct mlxsw_sp_acl_profile_ops *
 mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
 			      enum mlxsw_sp_acl_profile profile)
 {
@@ -1142,10 +977,3 @@ mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
 		return NULL;
 	return ops;
 }
-
-const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops = {
-	.priv_size		= sizeof(struct mlxsw_sp_acl_tcam),
-	.init			= mlxsw_sp_acl_tcam_init,
-	.fini			= mlxsw_sp_acl_tcam_fini,
-	.profile_ops		= mlxsw_sp_acl_tcam_profile_ops,
-};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
new file mode 100644
index 000000000000..cef769764505
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
@@ -0,0 +1,146 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Jiri Pirko <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_ACL_TCAM_H
+#define _MLXSW_SPECTRUM_ACL_TCAM_H
+
+#include <linux/list.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_keys.h"
+
+struct mlxsw_sp_acl_tcam {
+	unsigned long *used_regions; /* bit array */
+	unsigned int max_regions;
+	unsigned long *used_groups;  /* bit array */
+	unsigned int max_groups;
+	unsigned int max_group_size;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
+};
+
+size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_acl_tcam *tcam);
+void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_acl_tcam *tcam);
+int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_acl_rule_info *rulei,
+				   u32 *priority, bool fillup_priority);
+
+struct mlxsw_sp_acl_profile_ops {
+	size_t ruleset_priv_size;
+	int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_acl_tcam *tcam, void *ruleset_priv);
+	void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
+	int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+			    struct mlxsw_sp_port *mlxsw_sp_port,
+			    bool ingress);
+	void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
+			       struct mlxsw_sp_port *mlxsw_sp_port,
+			       bool ingress);
+	u16 (*ruleset_group_id)(void *ruleset_priv);
+	size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp);
+	int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
+			void *ruleset_priv, void *rule_priv,
+			struct mlxsw_sp_acl_rule_info *rulei);
+	void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv);
+	int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv,
+				 bool *activity);
+};
+
+const struct mlxsw_sp_acl_profile_ops *
+mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp,
+			      enum mlxsw_sp_acl_profile profile);
+
+#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16
+
+#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U)
+
+struct mlxsw_sp_acl_tcam_group;
+
+struct mlxsw_sp_acl_tcam_region {
+	struct list_head list; /* Member of a TCAM group */
+	struct list_head chunk_list; /* List of chunks under this region */
+	struct mlxsw_sp_acl_tcam_group *group;
+	enum mlxsw_reg_ptar_key_type key_type;
+	u16 id; /* ACL ID and region ID - they are same */
+	char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN];
+	struct mlxsw_afk_key_info *key_info;
+	struct mlxsw_sp *mlxsw_sp;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
+};
+
+struct mlxsw_sp_acl_ctcam_region {
+	struct parman *parman;
+	struct mlxsw_sp_acl_tcam_region *region;
+};
+
+struct mlxsw_sp_acl_ctcam_chunk {
+	struct parman_prio parman_prio;
+};
+
+struct mlxsw_sp_acl_ctcam_entry {
+	struct parman_item parman_item;
+};
+
+int mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_acl_ctcam_region *cregion,
+				   struct mlxsw_sp_acl_tcam_region *region);
+void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion);
+void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion,
+				   struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				   unsigned int priority);
+void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk);
+int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_acl_ctcam_region *cregion,
+				 struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				 struct mlxsw_sp_acl_ctcam_entry *centry,
+				 struct mlxsw_sp_acl_rule_info *rulei,
+				 bool fillup_priority);
+void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_ctcam_region *cregion,
+				  struct mlxsw_sp_acl_ctcam_chunk *cchunk,
+				  struct mlxsw_sp_acl_ctcam_entry *centry);
+static inline unsigned int
+mlxsw_sp_acl_ctcam_entry_offset(struct mlxsw_sp_acl_ctcam_entry *centry)
+{
+	return centry->parman_item.index;
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index fe4327f547d2..fd557585514d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2016 Jiri Pirko <[email protected]>
+ * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2018 Jiri Pirko <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -33,422 +33,74 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/bitops.h>
+#include <linux/slab.h>
 
 #include "spectrum.h"
 
-#define MLXSW_SP_KVDL_SINGLE_BASE 0
-#define MLXSW_SP_KVDL_SINGLE_SIZE 16384
-#define MLXSW_SP_KVDL_SINGLE_END \
-	(MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1)
-
-#define MLXSW_SP_KVDL_CHUNKS_BASE \
-	(MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE)
-#define MLXSW_SP_KVDL_CHUNKS_SIZE 49152
-#define MLXSW_SP_KVDL_CHUNKS_END \
-	(MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1)
-
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_BASE \
-	(MLXSW_SP_KVDL_CHUNKS_BASE + MLXSW_SP_KVDL_CHUNKS_SIZE)
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE \
-	(MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_LARGE_CHUNKS_BASE)
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_END \
-	(MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1)
-
-#define MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE 1
-#define MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE 32
-#define MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512
-
-struct mlxsw_sp_kvdl_part_info {
-	unsigned int part_index;
-	unsigned int start_index;
-	unsigned int end_index;
-	unsigned int alloc_size;
-	enum mlxsw_sp_resource_id resource_id;
-};
-
-enum mlxsw_sp_kvdl_part_id {
-	MLXSW_SP_KVDL_PART_ID_SINGLE,
-	MLXSW_SP_KVDL_PART_ID_CHUNKS,
-	MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS,
-};
-
-#define MLXSW_SP_KVDL_PART_INFO(id)				\
-[MLXSW_SP_KVDL_PART_ID_##id] = {				\
-	.start_index = MLXSW_SP_KVDL_##id##_BASE,		\
-	.end_index = MLXSW_SP_KVDL_##id##_END,			\
-	.alloc_size = MLXSW_SP_KVDL_##id##_ALLOC_SIZE,		\
-	.resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id,	\
-}
-
-static const struct mlxsw_sp_kvdl_part_info mlxsw_sp_kvdl_parts_info[] = {
-	MLXSW_SP_KVDL_PART_INFO(SINGLE),
-	MLXSW_SP_KVDL_PART_INFO(CHUNKS),
-	MLXSW_SP_KVDL_PART_INFO(LARGE_CHUNKS),
-};
-
-#define MLXSW_SP_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp_kvdl_parts_info)
-
-struct mlxsw_sp_kvdl_part {
-	struct mlxsw_sp_kvdl_part_info info;
-	unsigned long usage[0];	/* Entries */
-};
-
 struct mlxsw_sp_kvdl {
-	struct mlxsw_sp_kvdl_part *parts[MLXSW_SP_KVDL_PARTS_INFO_LEN];
+	const struct mlxsw_sp_kvdl_ops *kvdl_ops;
+	unsigned long priv[0];
+	/* priv has to be always the last item */
 };
 
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl,
-			      unsigned int alloc_size)
-{
-	struct mlxsw_sp_kvdl_part *part, *min_part = NULL;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-		part = kvdl->parts[i];
-		if (alloc_size <= part->info.alloc_size &&
-		    (!min_part ||
-		     part->info.alloc_size <= min_part->info.alloc_size))
-			min_part = part;
-	}
-
-	return min_part ?: ERR_PTR(-ENOBUFS);
-}
-
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index)
-{
-	struct mlxsw_sp_kvdl_part *part;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-		part = kvdl->parts[i];
-		if (kvdl_index >= part->info.start_index &&
-		    kvdl_index <= part->info.end_index)
-			return part;
-	}
-
-	return ERR_PTR(-EINVAL);
-}
-
-static u32
-mlxsw_sp_entry_index_kvdl_index(const struct mlxsw_sp_kvdl_part_info *info,
-				unsigned int entry_index)
-{
-	return info->start_index + entry_index * info->alloc_size;
-}
-
-static unsigned int
-mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info,
-				u32 kvdl_index)
-{
-	return (kvdl_index - info->start_index) / info->alloc_size;
-}
-
-static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part,
-				    u32 *p_kvdl_index)
-{
-	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-	unsigned int entry_index, nr_entries;
-
-	nr_entries = (info->end_index - info->start_index + 1) /
-		     info->alloc_size;
-	entry_index = find_first_zero_bit(part->usage, nr_entries);
-	if (entry_index == nr_entries)
-		return -ENOBUFS;
-	__set_bit(entry_index, part->usage);
-
-	*p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(info, entry_index);
-
-	return 0;
-}
-
-static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part,
-				    u32 kvdl_index)
-{
-	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-	unsigned int entry_index;
-
-	entry_index = mlxsw_sp_kvdl_index_entry_index(info, kvdl_index);
-	__clear_bit(entry_index, part->usage);
-}
-
-int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count,
-			u32 *p_entry_index)
-{
-	struct mlxsw_sp_kvdl_part *part;
-
-	/* Find partition with smallest allocation size satisfying the
-	 * requested size.
-	 */
-	part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
-	if (IS_ERR(part))
-		return PTR_ERR(part);
-
-	return mlxsw_sp_kvdl_part_alloc(part, p_entry_index);
-}
-
-void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index)
-{
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp_kvdl_index_part(mlxsw_sp->kvdl, entry_index);
-	if (IS_ERR(part))
-		return;
-	mlxsw_sp_kvdl_part_free(part, entry_index);
-}
-
-int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
-				   unsigned int entry_count,
-				   unsigned int *p_alloc_size)
-{
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count);
-	if (IS_ERR(part))
-		return PTR_ERR(part);
-
-	*p_alloc_size = part->info.alloc_size;
-
-	return 0;
-}
-
-static void mlxsw_sp_kvdl_part_update(struct mlxsw_sp_kvdl_part *part,
-				      struct mlxsw_sp_kvdl_part *part_prev,
-				      unsigned int size)
-{
-
-	if (!part_prev) {
-		part->info.end_index = size - 1;
-	} else {
-		part->info.start_index = part_prev->info.end_index + 1;
-		part->info.end_index = part->info.start_index + size - 1;
-	}
-}
-
-static struct mlxsw_sp_kvdl_part *
-mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp,
-			const struct mlxsw_sp_kvdl_part_info *info,
-			struct mlxsw_sp_kvdl_part *part_prev)
+int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
-	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-	struct mlxsw_sp_kvdl_part *part;
-	bool need_update = true;
-	unsigned int nr_entries;
-	size_t usage_size;
-	u64 resource_size;
+	const struct mlxsw_sp_kvdl_ops *kvdl_ops = mlxsw_sp->kvdl_ops;
+	struct mlxsw_sp_kvdl *kvdl;
 	int err;
 
-	err = devlink_resource_size_get(devlink, info->resource_id,
-					&resource_size);
-	if (err) {
-		need_update = false;
-		resource_size = info->end_index - info->start_index + 1;
-	}
-
-	nr_entries = div_u64(resource_size, info->alloc_size);
-	usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long);
-	part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL);
-	if (!part)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(&part->info, info, sizeof(part->info));
-
-	if (need_update)
-		mlxsw_sp_kvdl_part_update(part, part_prev, resource_size);
-	return part;
-}
-
-static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp_kvdl_part *part)
-{
-	kfree(part);
-}
-
-static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp)
-{
-	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
-	const struct mlxsw_sp_kvdl_part_info *info;
-	struct mlxsw_sp_kvdl_part *part_prev = NULL;
-	int err, i;
+	kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl) + kvdl_ops->priv_size,
+		       GFP_KERNEL);
+	if (!kvdl)
+		return -ENOMEM;
+	kvdl->kvdl_ops = kvdl_ops;
+	mlxsw_sp->kvdl = kvdl;
 
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) {
-		info = &mlxsw_sp_kvdl_parts_info[i];
-		kvdl->parts[i] = mlxsw_sp_kvdl_part_init(mlxsw_sp, info,
-							 part_prev);
-		if (IS_ERR(kvdl->parts[i])) {
-			err = PTR_ERR(kvdl->parts[i]);
-			goto err_kvdl_part_init;
-		}
-		part_prev = kvdl->parts[i];
-	}
+	err = kvdl_ops->init(mlxsw_sp, kvdl->priv);
+	if (err)
+		goto err_init;
 	return 0;
 
-err_kvdl_part_init:
-	for (i--; i >= 0; i--)
-		mlxsw_sp_kvdl_part_fini(kvdl->parts[i]);
+err_init:
+	kfree(kvdl);
 	return err;
 }
 
-static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp)
+void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++)
-		mlxsw_sp_kvdl_part_fini(kvdl->parts[i]);
-}
-
-static u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part)
-{
-	const struct mlxsw_sp_kvdl_part_info *info = &part->info;
-	unsigned int nr_entries;
-	int bit = -1;
-	u64 occ = 0;
-
-	nr_entries = (info->end_index -
-		      info->start_index + 1) /
-		      info->alloc_size;
-	while ((bit = find_next_bit(part->usage, nr_entries, bit + 1))
-		< nr_entries)
-		occ += info->alloc_size;
-	return occ;
-}
-
-static u64 mlxsw_sp_kvdl_occ_get(void *priv)
-{
-	const struct mlxsw_sp *mlxsw_sp = priv;
-	u64 occ = 0;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++)
-		occ += mlxsw_sp_kvdl_part_occ(mlxsw_sp->kvdl->parts[i]);
-
-	return occ;
-}
-
-static u64 mlxsw_sp_kvdl_single_occ_get(void *priv)
-{
-	const struct mlxsw_sp *mlxsw_sp = priv;
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_SINGLE];
-	return mlxsw_sp_kvdl_part_occ(part);
-}
-
-static u64 mlxsw_sp_kvdl_chunks_occ_get(void *priv)
-{
-	const struct mlxsw_sp *mlxsw_sp = priv;
-	struct mlxsw_sp_kvdl_part *part;
-
-	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_CHUNKS];
-	return mlxsw_sp_kvdl_part_occ(part);
-}
-
-static u64 mlxsw_sp_kvdl_large_chunks_occ_get(void *priv)
-{
-	const struct mlxsw_sp *mlxsw_sp = priv;
-	struct mlxsw_sp_kvdl_part *part;
 
-	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS];
-	return mlxsw_sp_kvdl_part_occ(part);
+	kvdl->kvdl_ops->fini(mlxsw_sp, kvdl->priv);
+	kfree(kvdl);
 }
 
-int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
+int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_kvdl_entry_type type,
+			unsigned int entry_count, u32 *p_entry_index)
 {
-	struct devlink *devlink = priv_to_devlink(mlxsw_core);
-	static struct devlink_resource_size_params size_params;
-	u32 kvdl_max_size;
-	int err;
-
-	kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) -
-			MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) -
-			MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE);
-
-	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-					  MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE,
-					  DEVLINK_RESOURCE_UNIT_ENTRY);
-	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES,
-					MLXSW_SP_KVDL_SINGLE_SIZE,
-					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
-					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params);
-	if (err)
-		return err;
-
-	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-					  MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE,
-					  DEVLINK_RESOURCE_UNIT_ENTRY);
-	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS,
-					MLXSW_SP_KVDL_CHUNKS_SIZE,
-					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
-					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params);
-	if (err)
-		return err;
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
 
-	devlink_resource_size_params_init(&size_params, 0, kvdl_max_size,
-					  MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE,
-					  DEVLINK_RESOURCE_UNIT_ENTRY);
-	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS,
-					MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
-					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
-					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params);
-	return err;
+	return kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type,
+				     entry_count, p_entry_index);
 }
 
-int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
+void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_kvdl_entry_type type,
+			unsigned int entry_count, int entry_index)
 {
-	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-	struct mlxsw_sp_kvdl *kvdl;
-	int err;
-
-	kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl), GFP_KERNEL);
-	if (!kvdl)
-		return -ENOMEM;
-	mlxsw_sp->kvdl = kvdl;
-
-	err = mlxsw_sp_kvdl_parts_init(mlxsw_sp);
-	if (err)
-		goto err_kvdl_parts_init;
-
-	devlink_resource_occ_get_register(devlink,
-					  MLXSW_SP_RESOURCE_KVD_LINEAR,
-					  mlxsw_sp_kvdl_occ_get,
-					  mlxsw_sp);
-	devlink_resource_occ_get_register(devlink,
-					  MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
-					  mlxsw_sp_kvdl_single_occ_get,
-					  mlxsw_sp);
-	devlink_resource_occ_get_register(devlink,
-					  MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
-					  mlxsw_sp_kvdl_chunks_occ_get,
-					  mlxsw_sp);
-	devlink_resource_occ_get_register(devlink,
-					  MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
-					  mlxsw_sp_kvdl_large_chunks_occ_get,
-					  mlxsw_sp);
-
-	return 0;
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
 
-err_kvdl_parts_init:
-	kfree(mlxsw_sp->kvdl);
-	return err;
+	kvdl->kvdl_ops->free(mlxsw_sp, kvdl->priv, type,
+			     entry_count, entry_index);
 }
 
-void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
+int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp,
+				    enum mlxsw_sp_kvdl_entry_type type,
+				    unsigned int entry_count,
+				    unsigned int *p_alloc_count)
 {
-	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl;
 
-	devlink_resource_occ_get_unregister(devlink,
-					    MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
-	devlink_resource_occ_get_unregister(devlink,
-					    MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
-	devlink_resource_occ_get_unregister(devlink,
-					    MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
-	devlink_resource_occ_get_unregister(devlink,
-					    MLXSW_SP_RESOURCE_KVD_LINEAR);
-	mlxsw_sp_kvdl_parts_fini(mlxsw_sp);
-	kfree(mlxsw_sp->kvdl);
+	return kvdl->kvdl_ops->alloc_size_query(mlxsw_sp, kvdl->priv, type,
+						entry_count, p_alloc_count);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
index a82539609d49..98dcaf78365c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -1075,6 +1075,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
 	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
 
 	cancel_delayed_work_sync(&mr->stats_update_dw);
-	mr->mr_ops->fini(mr->priv);
+	mr->mr_ops->fini(mlxsw_sp, mr->priv);
 	kfree(mr);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
index 7c864a86811d..c92fa90dca31 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
@@ -46,15 +46,6 @@ enum mlxsw_sp_mr_route_action {
 	MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD,
 };
 
-enum mlxsw_sp_mr_route_prio {
-	MLXSW_SP_MR_ROUTE_PRIO_SG,
-	MLXSW_SP_MR_ROUTE_PRIO_STARG,
-	MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
-	__MLXSW_SP_MR_ROUTE_PRIO_MAX
-};
-
-#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
-
 struct mlxsw_sp_mr_route_key {
 	int vrid;
 	enum mlxsw_sp_l3proto proto;
@@ -101,7 +92,7 @@ struct mlxsw_sp_mr_ops {
 			      u16 erif_index);
 	void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
 			      void *route_priv);
-	void (*fini)(void *priv);
+	void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv);
 };
 
 struct mlxsw_sp_mr;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
index 4f4c0d311883..e9c9f1f45b9d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
@@ -1,7 +1,8 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
- * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2017 Yotam Gigi <[email protected]>
+ * Copyright (c) 2018 Jiri Pirko <[email protected]>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,7 +36,6 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
-#include <linux/parman.h>
 
 #include "spectrum_mr_tcam.h"
 #include "reg.h"
@@ -43,15 +43,8 @@
 #include "core_acl_flex_actions.h"
 #include "spectrum_mr.h"
 
-struct mlxsw_sp_mr_tcam_region {
-	struct mlxsw_sp *mlxsw_sp;
-	enum mlxsw_reg_rtar_key_type rtar_key_type;
-	struct parman *parman;
-	struct parman_prio *parman_prios;
-};
-
 struct mlxsw_sp_mr_tcam {
-	struct mlxsw_sp_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX];
+	void *priv;
 };
 
 /* This struct maps to one RIGR2 register entry */
@@ -84,8 +77,6 @@ mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list)
 	INIT_LIST_HEAD(&erif_list->erif_sublists);
 }
 
-#define MLXSW_SP_KVDL_RIGR2_SIZE 1
-
 static struct mlxsw_sp_mr_erif_sublist *
 mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_mr_tcam_erif_list *erif_list)
@@ -96,8 +87,8 @@ mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
 	erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL);
 	if (!erif_sublist)
 		return ERR_PTR(-ENOMEM);
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE,
-				  &erif_sublist->rigr2_kvdl_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+				  1, &erif_sublist->rigr2_kvdl_index);
 	if (err) {
 		kfree(erif_sublist);
 		return ERR_PTR(err);
@@ -112,7 +103,8 @@ mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_mr_erif_sublist *erif_sublist)
 {
 	list_del(&erif_sublist->list);
-	mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+			   1, erif_sublist->rigr2_kvdl_index);
 	kfree(erif_sublist);
 }
 
@@ -221,12 +213,11 @@ struct mlxsw_sp_mr_tcam_route {
 	struct mlxsw_sp_mr_tcam_erif_list erif_list;
 	struct mlxsw_afa_block *afa_block;
 	u32 counter_index;
-	struct parman_item parman_item;
-	struct parman_prio *parman_prio;
 	enum mlxsw_sp_mr_route_action action;
 	struct mlxsw_sp_mr_route_key key;
 	u16 irif_index;
 	u16 min_mtu;
+	void *priv;
 };
 
 static struct mlxsw_afa_block *
@@ -297,60 +288,6 @@ mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block)
 	mlxsw_afa_block_destroy(afa_block);
 }
 
-static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
-					  struct parman_item *parman_item,
-					  struct mlxsw_sp_mr_route_key *key,
-					  struct mlxsw_afa_block *afa_block)
-{
-	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
-
-	switch (key->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
-					  key->vrid,
-					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
-					  ntohl(key->group.addr4),
-					  ntohl(key->group_mask.addr4),
-					  ntohl(key->source.addr4),
-					  ntohl(key->source_mask.addr4),
-					  mlxsw_afa_block_first_set(afa_block));
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index,
-					  key->vrid,
-					  MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
-					  key->group.addr6,
-					  key->group_mask.addr6,
-					  key->source.addr6,
-					  key->source_mask.addr6,
-					  mlxsw_afa_block_first_set(afa_block));
-	}
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
-}
-
-static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid,
-					 struct mlxsw_sp_mr_route_key *key,
-					 struct parman_item *parman_item)
-{
-	struct in6_addr zero_addr = IN6ADDR_ANY_INIT;
-	char rmft2_pl[MLXSW_REG_RMFT2_LEN];
-
-	switch (key->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index,
-					  vrid, 0, 0, 0, 0, 0, 0, NULL);
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index,
-					  vrid, 0, 0, zero_addr, zero_addr,
-					  zero_addr, zero_addr, NULL);
-		break;
-	}
-
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
-}
-
 static int
 mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_mr_tcam_erif_list *erif_list,
@@ -370,51 +307,12 @@ mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static struct mlxsw_sp_mr_tcam_region *
-mlxsw_sp_mr_tcam_protocol_region(struct mlxsw_sp_mr_tcam *mr_tcam,
-				 enum mlxsw_sp_l3proto proto)
-{
-	return &mr_tcam->tcam_regions[proto];
-}
-
-static int
-mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam,
-				       struct mlxsw_sp_mr_tcam_route *route,
-				       enum mlxsw_sp_mr_route_prio prio)
-{
-	struct mlxsw_sp_mr_tcam_region *tcam_region;
-	int err;
-
-	tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
-						       route->key.proto);
-	err = parman_item_add(tcam_region->parman,
-			      &tcam_region->parman_prios[prio],
-			      &route->parman_item);
-	if (err)
-		return err;
-
-	route->parman_prio = &tcam_region->parman_prios[prio];
-	return 0;
-}
-
-static void
-mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam,
-					  struct mlxsw_sp_mr_tcam_route *route)
-{
-	struct mlxsw_sp_mr_tcam_region *tcam_region;
-
-	tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam,
-						       route->key.proto);
-
-	parman_item_remove(tcam_region->parman,
-			   route->parman_prio, &route->parman_item);
-}
-
 static int
 mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
 			      void *route_priv,
 			      struct mlxsw_sp_mr_route_params *route_params)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
 	int err;
@@ -448,22 +346,23 @@ mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
 		goto err_afa_block_create;
 	}
 
-	/* Allocate place in the TCAM */
-	err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route,
-						     route_params->prio);
-	if (err)
-		goto err_parman_item_add;
+	route->priv = kzalloc(ops->route_priv_size, GFP_KERNEL);
+	if (!route->priv) {
+		err = -ENOMEM;
+		goto err_route_priv_alloc;
+	}
 
 	/* Write the route to the TCAM */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, route->afa_block);
+	err = ops->route_create(mlxsw_sp, mr_tcam->priv, route->priv,
+				&route->key, route->afa_block,
+				route_params->prio);
 	if (err)
-		goto err_route_replace;
+		goto err_route_create;
 	return 0;
 
-err_route_replace:
-	mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
-err_parman_item_add:
+err_route_create:
+	kfree(route->priv);
+err_route_priv_alloc:
 	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
 err_afa_block_create:
 	mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
@@ -476,12 +375,12 @@ err_counter_alloc:
 static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp,
 					   void *priv, void *route_priv)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
 
-	mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid,
-				      &route->key, &route->parman_item);
-	mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
+	ops->route_destroy(mlxsw_sp, mr_tcam->priv, route->priv, &route->key);
+	kfree(route->priv);
 	mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
 	mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
 	mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
@@ -502,6 +401,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
 				     void *route_priv,
 				     enum mlxsw_sp_mr_route_action route_action)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_afa_block *afa_block;
 	int err;
@@ -516,8 +416,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
 		return PTR_ERR(afa_block);
 
 	/* Update the TCAM route entry */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, afa_block);
+	err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
 	if (err)
 		goto err;
 
@@ -534,6 +433,7 @@ err:
 static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
 						 void *route_priv, u16 min_mtu)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_afa_block *afa_block;
 	int err;
@@ -549,8 +449,7 @@ static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
 		return PTR_ERR(afa_block);
 
 	/* Update the TCAM route entry */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, afa_block);
+	err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
 	if (err)
 		goto err;
 
@@ -596,6 +495,7 @@ static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
 					   void *route_priv, u16 erif_index)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_sp_mr_erif_sublist *erif_sublist;
 	struct mlxsw_sp_mr_tcam_erif_list erif_list;
@@ -630,8 +530,7 @@ static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
 	}
 
 	/* Update the TCAM route entry */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, afa_block);
+	err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
 	if (err)
 		goto err_route_write;
 
@@ -653,6 +552,7 @@ static int
 mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
 			      struct mlxsw_sp_mr_route_info *route_info)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam_route *route = route_priv;
 	struct mlxsw_sp_mr_tcam_erif_list erif_list;
 	struct mlxsw_afa_block *afa_block;
@@ -677,8 +577,7 @@ mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
 	}
 
 	/* Update the TCAM route entry */
-	err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
-					     &route->key, afa_block);
+	err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block);
 	if (err)
 		goto err_route_write;
 
@@ -699,167 +598,36 @@ err_erif_populate:
 	return err;
 }
 
-#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16
-#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16
-
-static int
-mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-	char rtar_pl[MLXSW_REG_RTAR_LEN];
-
-	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
-			    mr_tcam_region->rtar_key_type,
-			    MLXSW_SP_MR_TCAM_REGION_BASE_COUNT);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static void
-mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-	char rtar_pl[MLXSW_REG_RTAR_LEN];
-
-	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
-			    mr_tcam_region->rtar_key_type, 0);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv,
-						 unsigned long new_count)
-{
-	struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
-	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-	char rtar_pl[MLXSW_REG_RTAR_LEN];
-	u64 max_tcam_rules;
-
-	max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
-	if (new_count > max_tcam_rules)
-		return -EINVAL;
-	mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
-			    mr_tcam_region->rtar_key_type, new_count);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
-}
-
-static void mlxsw_sp_mr_tcam_region_parman_move(void *priv,
-						unsigned long from_index,
-						unsigned long to_index,
-						unsigned long count)
-{
-	struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
-	struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
-	char rrcr_pl[MLXSW_REG_RRCR_LEN];
-
-	mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
-			    from_index, count,
-			    mr_tcam_region->rtar_key_type, to_index);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
-}
-
-static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = {
-	.base_count	= MLXSW_SP_MR_TCAM_REGION_BASE_COUNT,
-	.resize_step	= MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP,
-	.resize		= mlxsw_sp_mr_tcam_region_parman_resize,
-	.move		= mlxsw_sp_mr_tcam_region_parman_move,
-	.algo		= PARMAN_ALGO_TYPE_LSORT,
-};
-
-static int
-mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
-			     struct mlxsw_sp_mr_tcam_region *mr_tcam_region,
-			     enum mlxsw_reg_rtar_key_type rtar_key_type)
-{
-	struct parman_prio *parman_prios;
-	struct parman *parman;
-	int err;
-	int i;
-
-	mr_tcam_region->rtar_key_type = rtar_key_type;
-	mr_tcam_region->mlxsw_sp = mlxsw_sp;
-
-	err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region);
-	if (err)
-		return err;
-
-	parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops,
-			       mr_tcam_region);
-	if (!parman) {
-		err = -ENOMEM;
-		goto err_parman_create;
-	}
-	mr_tcam_region->parman = parman;
-
-	parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
-				     sizeof(*parman_prios), GFP_KERNEL);
-	if (!parman_prios) {
-		err = -ENOMEM;
-		goto err_parman_prios_alloc;
-	}
-	mr_tcam_region->parman_prios = parman_prios;
-
-	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
-		parman_prio_init(mr_tcam_region->parman,
-				 &mr_tcam_region->parman_prios[i], i);
-	return 0;
-
-err_parman_prios_alloc:
-	parman_destroy(parman);
-err_parman_create:
-	mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
-	return err;
-}
-
-static void
-mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
-{
-	int i;
-
-	for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
-		parman_prio_fini(&mr_tcam_region->parman_prios[i]);
-	kfree(mr_tcam_region->parman_prios);
-	parman_destroy(mr_tcam_region->parman);
-	mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
-}
-
 static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
-	struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
-	u32 rtar_key;
 	int err;
 
-	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) ||
-	    !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES))
 		return -EIO;
 
-	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST;
-	err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
-					   &region[MLXSW_SP_L3_PROTO_IPV4],
-					   rtar_key);
-	if (err)
-		return err;
+	mr_tcam->priv = kzalloc(ops->priv_size, GFP_KERNEL);
+	if (!mr_tcam->priv)
+		return -ENOMEM;
 
-	rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST;
-	err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
-					   &region[MLXSW_SP_L3_PROTO_IPV6],
-					   rtar_key);
+	err = ops->init(mlxsw_sp, mr_tcam->priv);
 	if (err)
-		goto err_ipv6_region_init;
-
+		goto err_init;
 	return 0;
 
-err_ipv6_region_init:
-	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+err_init:
+	kfree(mr_tcam->priv);
 	return err;
 }
 
-static void mlxsw_sp_mr_tcam_fini(void *priv)
+static void mlxsw_sp_mr_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv)
 {
+	const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops;
 	struct mlxsw_sp_mr_tcam *mr_tcam = priv;
-	struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0];
 
-	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV6]);
-	mlxsw_sp_mr_tcam_region_fini(&region[MLXSW_SP_L3_PROTO_IPV4]);
+	ops->fini(mr_tcam->priv);
+	kfree(mr_tcam->priv);
 }
 
 const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 79dcadb40857..b4126db695dd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1106,7 +1106,8 @@ mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
 	u32 tunnel_index;
 	int err;
 
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+				  1, &tunnel_index);
 	if (err)
 		return err;
 
@@ -1122,7 +1123,8 @@ static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
 	/* Unlink this node from the IPIP entry that it's the decap entry of. */
 	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
 	fib_entry->decap.ipip_entry = NULL;
-	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+			   1, fib_entry->decap.tunnel_index);
 }
 
 static struct mlxsw_sp_fib_node *
@@ -3162,8 +3164,9 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
 	 * by the device and make sure the request can be satisfied.
 	 */
 	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
-	err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
-					     &alloc_size);
+	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
+					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+					      *p_adj_grp_size, &alloc_size);
 	if (err)
 		return err;
 	/* It is possible the allocation results in more allocated
@@ -3275,7 +3278,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		/* No valid allocation size available. */
 		goto set_trap;
 
-	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+				  ecmp_size, &adj_index);
 	if (err) {
 		/* We ran out of KVD linear space, just set the
 		 * trap and let everything flow through kernel.
@@ -3310,7 +3314,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 
 	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
 					     old_adj_index, old_ecmp_size);
-	mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
+	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+			   old_ecmp_size, old_adj_index);
 	if (err) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
 		goto set_trap;
@@ -3332,7 +3337,8 @@ set_trap:
 	if (err)
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
 	if (old_adj_index_valid)
-		mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
+		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+				   nh_grp->ecmp_size, nh_grp->adj_index);
 }
 
 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c
index 4c417903e9be..094cea775d0c 100644
--- a/drivers/net/wimax/i2400m/control.c
+++ b/drivers/net/wimax/i2400m/control.c
@@ -566,13 +566,12 @@ static void i2400m_msg_ack_hook(struct i2400m *i2400m,
 {
 	int result;
 	struct device *dev = i2400m_dev(i2400m);
-	unsigned ack_type, ack_status;
+	unsigned int ack_type;
 	char strerr[32];
 
 	/* Chew on the message, we might need some information from
 	 * here */
 	ack_type = le16_to_cpu(l3l4_hdr->type);
-	ack_status = le16_to_cpu(l3l4_hdr->status);
 	switch (ack_type) {
 	case I2400M_MT_CMD_ENTER_POWERSAVE:
 		/* This is just left here for the sake of example, as
diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index a89b5685e68b..e9fc168bb734 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c
@@ -1552,7 +1552,6 @@ int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags)
 	int ret, itr;
 	struct device *dev = i2400m_dev(i2400m);
 	struct i2400m_fw *i2400m_fw;
-	const struct i2400m_bcf_hdr *bcf;	/* Firmware data */
 	const struct firmware *fw;
 	const char *fw_name;
 
@@ -1574,7 +1573,7 @@ int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags)
 	}
 
 	/* Load firmware files to memory. */
-	for (itr = 0, bcf = NULL, ret = -ENOENT; ; itr++) {
+	for (itr = 0, ret = -ENOENT; ; itr++) {
 		fw_name = i2400m->bus_fw_names[itr];
 		if (fw_name == NULL) {
 			dev_err(dev, "Could not find a usable firmware image\n");
diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c
index a654687b5fa2..9ab3f0fdfea4 100644
--- a/drivers/net/wimax/i2400m/netdev.c
+++ b/drivers/net/wimax/i2400m/netdev.c
@@ -535,14 +535,12 @@ void i2400m_net_erx(struct i2400m *i2400m, struct sk_buff *skb,
 {
 	struct net_device *net_dev = i2400m->wimax_dev.net_dev;
 	struct device *dev = i2400m_dev(i2400m);
-	int protocol;
 
 	d_fnstart(2, dev, "(i2400m %p skb %p [%u] cs %d)\n",
 		  i2400m, skb, skb->len, cs);
 	switch(cs) {
 	case I2400M_CS_IPV4_0:
 	case I2400M_CS_IPV4:
-		protocol = ETH_P_IP;
 		i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev,
 					  skb->data - ETH_HLEN,
 					  cpu_to_be16(ETH_P_IP));
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 5a5e0a2ab2a3..07efffd0c759 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -294,12 +294,16 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct sk_buff *skb, *next;
+	struct list_head sublist;
 
+	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
-		int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
-		if (ret != 1)
-			list_del(&skb->list);
+		list_del(&skb->list);
+		if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1)
+			list_add_tail(&skb->list, &sublist);
 	}
+	/* Put passed packets back on main list */
+	list_splice(&sublist, head);
 }
 
 /* Call setsockopt() */
@@ -410,8 +414,17 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
 void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+struct nf_conntrack_tuple;
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb);
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+struct nf_conntrack_tuple;
+static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	return false;
+}
 #endif
 
 struct nf_conn;
@@ -420,6 +433,8 @@ enum ip_conntrack_info;
 struct nf_ct_hook {
 	int (*update)(struct net *net, struct sk_buff *skb);
 	void (*destroy)(struct nf_conntrack *);
+	bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+			      const struct sk_buff *);
 };
 extern struct nf_ct_hook __rcu *nf_ct_hook;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cce37694776e..f6cb20e6e524 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -954,6 +954,8 @@ struct rate_sample {
 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
 	s32  delivered;		/* number of packets delivered over interval */
 	long interval_us;	/* time for tp->delivered to incr "delivered" */
+	u32 snd_interval_us;	/* snd interval for delivered packets */
+	u32 rcv_interval_us;	/* rcv interval for delivered packets */
 	long rtt_us;		/* RTT of last (S)ACKed packet (or -1) */
 	int  losses;		/* number of packets marked lost upon ACK */
 	u32  acked_sacked;	/* number of packets newly (S)ACKed upon ACK */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index f8f4539f1135..97ff3c17ec4d 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -155,8 +155,8 @@ enum txtime_flags {
 };
 
 struct sock_txtime {
-	clockid_t       clockid;        /* reference clockid */
-	u32             flags;          /* flags defined by enum txtime_flags */
+	clockid_t	clockid;	/* reference clockid */
+	__u32		flags;		/* as defined by enum txtime_flags */
 };
 
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 949118461009..d9cc9dc4f547 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -955,4 +955,118 @@ enum {
 
 #define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
 
+
+/* CAKE */
+enum {
+	TCA_CAKE_UNSPEC,
+	TCA_CAKE_PAD,
+	TCA_CAKE_BASE_RATE64,
+	TCA_CAKE_DIFFSERV_MODE,
+	TCA_CAKE_ATM,
+	TCA_CAKE_FLOW_MODE,
+	TCA_CAKE_OVERHEAD,
+	TCA_CAKE_RTT,
+	TCA_CAKE_TARGET,
+	TCA_CAKE_AUTORATE,
+	TCA_CAKE_MEMORY,
+	TCA_CAKE_NAT,
+	TCA_CAKE_RAW,
+	TCA_CAKE_WASH,
+	TCA_CAKE_MPU,
+	TCA_CAKE_INGRESS,
+	TCA_CAKE_ACK_FILTER,
+	TCA_CAKE_SPLIT_GSO,
+	__TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX	(__TCA_CAKE_MAX - 1)
+
+enum {
+	__TCA_CAKE_STATS_INVALID,
+	TCA_CAKE_STATS_PAD,
+	TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
+	TCA_CAKE_STATS_MEMORY_LIMIT,
+	TCA_CAKE_STATS_MEMORY_USED,
+	TCA_CAKE_STATS_AVG_NETOFF,
+	TCA_CAKE_STATS_MIN_NETLEN,
+	TCA_CAKE_STATS_MAX_NETLEN,
+	TCA_CAKE_STATS_MIN_ADJLEN,
+	TCA_CAKE_STATS_MAX_ADJLEN,
+	TCA_CAKE_STATS_TIN_STATS,
+	TCA_CAKE_STATS_DEFICIT,
+	TCA_CAKE_STATS_COBALT_COUNT,
+	TCA_CAKE_STATS_DROPPING,
+	TCA_CAKE_STATS_DROP_NEXT_US,
+	TCA_CAKE_STATS_P_DROP,
+	TCA_CAKE_STATS_BLUE_TIMER_US,
+	__TCA_CAKE_STATS_MAX
+};
+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
+enum {
+	__TCA_CAKE_TIN_STATS_INVALID,
+	TCA_CAKE_TIN_STATS_PAD,
+	TCA_CAKE_TIN_STATS_SENT_PACKETS,
+	TCA_CAKE_TIN_STATS_SENT_BYTES64,
+	TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
+	TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
+	TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
+	TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
+	TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
+	TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
+	TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
+	TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
+	TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
+	TCA_CAKE_TIN_STATS_TARGET_US,
+	TCA_CAKE_TIN_STATS_INTERVAL_US,
+	TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
+	TCA_CAKE_TIN_STATS_WAY_MISSES,
+	TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
+	TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
+	TCA_CAKE_TIN_STATS_AVG_DELAY_US,
+	TCA_CAKE_TIN_STATS_BASE_DELAY_US,
+	TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
+	TCA_CAKE_TIN_STATS_BULK_FLOWS,
+	TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
+	TCA_CAKE_TIN_STATS_MAX_SKBLEN,
+	TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
+	__TCA_CAKE_TIN_STATS_MAX
+};
+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
+
+enum {
+	CAKE_FLOW_NONE = 0,
+	CAKE_FLOW_SRC_IP,
+	CAKE_FLOW_DST_IP,
+	CAKE_FLOW_HOSTS,    /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
+	CAKE_FLOW_FLOWS,
+	CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_TRIPLE,   /* = CAKE_FLOW_HOSTS  | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_MAX,
+};
+
+enum {
+	CAKE_DIFFSERV_DIFFSERV3 = 0,
+	CAKE_DIFFSERV_DIFFSERV4,
+	CAKE_DIFFSERV_DIFFSERV8,
+	CAKE_DIFFSERV_BESTEFFORT,
+	CAKE_DIFFSERV_PRECEDENCE,
+	CAKE_DIFFSERV_MAX
+};
+
+enum {
+	CAKE_ACK_NONE = 0,
+	CAKE_ACK_FILTER,
+	CAKE_ACK_AGGRESSIVE,
+	CAKE_ACK_MAX
+};
+
+enum {
+	CAKE_ATM_NONE = 0,
+	CAKE_ATM_ATM,
+	CAKE_ATM_PTM,
+	CAKE_ATM_MAX
+};
+
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index e18d81837a6c..1c3f0997e857 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4941,23 +4941,28 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
 	struct list_head sublist;
 	struct sk_buff *skb, *next;
 
+	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
 		struct net_device *orig_dev = skb->dev;
 		struct packet_type *pt_prev = NULL;
 
+		list_del(&skb->list);
 		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+		if (!pt_prev)
+			continue;
 		if (pt_curr != pt_prev || od_curr != orig_dev) {
 			/* dispatch old sublist */
-			list_cut_before(&sublist, head, &skb->list);
 			__netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
 			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
 			pt_curr = pt_prev;
 			od_curr = orig_dev;
 		}
+		list_add_tail(&skb->list, &sublist);
 	}
 
 	/* dispatch final sublist */
-	__netif_receive_skb_list_ptype(head, pt_curr, od_curr);
+	__netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
 }
 
 static int __netif_receive_skb(struct sk_buff *skb)
@@ -5093,25 +5098,30 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 {
 	struct bpf_prog *xdp_prog = NULL;
 	struct sk_buff *skb, *next;
+	struct list_head sublist;
 
+	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
 		net_timestamp_check(netdev_tstamp_prequeue, skb);
-		if (skb_defer_rx_timestamp(skb))
-			/* Handled, remove from list */
-			list_del(&skb->list);
+		list_del(&skb->list);
+		if (!skb_defer_rx_timestamp(skb))
+			list_add_tail(&skb->list, &sublist);
 	}
+	list_splice_init(&sublist, head);
 
 	if (static_branch_unlikely(&generic_xdp_needed_key)) {
 		preempt_disable();
 		rcu_read_lock();
 		list_for_each_entry_safe(skb, next, head, list) {
 			xdp_prog = rcu_dereference(skb->dev->xdp_prog);
-			if (do_xdp_generic(xdp_prog, skb) != XDP_PASS)
-				/* Dropped, remove from list */
-				list_del(&skb->list);
+			list_del(&skb->list);
+			if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
+				list_add_tail(&skb->list, &sublist);
 		}
 		rcu_read_unlock();
 		preempt_enable();
+		/* Put passed packets back on main list */
+		list_splice_init(&sublist, head);
 	}
 
 	rcu_read_lock();
@@ -5122,9 +5132,9 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 			int cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
 			if (cpu >= 0) {
-				enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
-				/* Handled, remove from list */
+				/* Will be handled, remove from list */
 				list_del(&skb->list);
+				enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			}
 		}
 	}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c4082cd50257..e3704a49164b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1274,9 +1274,6 @@ restart:
 			int linear;
 
 new_segment:
-			/* Allocate new segment. If the interface is SG,
-			 * allocate skb fitting to single page.
-			 */
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c61240e43923..4dff40dad4dc 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -146,6 +146,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 				    rs->prior_mstamp); /* ack phase */
 	rs->interval_us = max(snd_us, ack_us);
 
+	/* Record both segment send and ack receive intervals */
+	rs->snd_interval_us = snd_us;
+	rs->rcv_interval_us = ack_us;
+
 	/* Normally we expect interval_us >= min-rtt.
 	 * Note that rate may still be over-estimated when a spuriously
 	 * retransmistted skb was first (s)acked because "interval_us"
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 91580c62bb86..1659a6b3cf42 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -385,8 +385,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 
 	if (ndev->cnf.stable_secret.initialized)
 		ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
-	else
-		ndev->cnf.addr_gen_mode = ipv6_devconf_dflt.addr_gen_mode;
 
 	ndev->cnf.mtu6 = dev->mtu;
 	ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -5210,7 +5208,9 @@ static inline size_t inet6_ifla6_size(void)
 	     + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
 	     + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
 	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
-	     + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
+	     + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
+	     + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+	     + 0;
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -5892,32 +5892,31 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 					 loff_t *ppos)
 {
 	int ret = 0;
-	int new_val;
+	u32 new_val;
 	struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
 	struct net *net = (struct net *)ctl->extra2;
+	struct ctl_table tmp = {
+		.data = &new_val,
+		.maxlen = sizeof(new_val),
+		.mode = ctl->mode,
+	};
 
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+	new_val = *((u32 *)ctl->data);
 
-	if (write) {
-		new_val = *((int *)ctl->data);
+	ret = proc_douintvec(&tmp, write, buffer, lenp, ppos);
+	if (ret != 0)
+		goto out;
 
+	if (write) {
 		if (check_addr_gen_mode(new_val) < 0) {
 			ret = -EINVAL;
 			goto out;
 		}
 
-		/* request for default */
-		if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
-			ipv6_devconf_dflt.addr_gen_mode = new_val;
-
-		/* request for individual net device */
-		} else {
-			if (!idev)
-				goto out;
-
+		if (idev) {
 			if (check_stable_privacy(idev, net, new_val) < 0) {
 				ret = -EINVAL;
 				goto out;
@@ -5927,7 +5926,21 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 				idev->cnf.addr_gen_mode = new_val;
 				addrconf_dev_config(idev->dev);
 			}
+		} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
+			struct net_device *dev;
+
+			net->ipv6.devconf_dflt->addr_gen_mode = new_val;
+			for_each_netdev(net, dev) {
+				idev = __in6_dev_get(dev);
+				if (idev &&
+				    idev->cnf.addr_gen_mode != new_val) {
+					idev->cnf.addr_gen_mode = new_val;
+					addrconf_dev_config(idev->dev);
+				}
+			}
 		}
+
+		*((u32 *)ctl->data) = new_val;
 	}
 
 out:
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 168af54db975..dc240cb47ddf 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -603,6 +603,21 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
 }
 EXPORT_SYMBOL(nf_conntrack_destroy);
 
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb)
+{
+	struct nf_ct_hook *ct_hook;
+	bool ret = false;
+
+	rcu_read_lock();
+	ct_hook = rcu_dereference(nf_ct_hook);
+	if (ct_hook)
+		ret = ct_hook->get_tuple_skb(dst_tuple, skb);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(nf_ct_get_tuple_skb);
+
 /* Built-in default zone used e.g. by modules. */
 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 	.id	= NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3465da2a98bd..85ab2fd6a665 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1683,6 +1683,41 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 	return 0;
 }
 
+static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	const struct nf_conntrack_tuple *src_tuple;
+	const struct nf_conntrack_tuple_hash *hash;
+	struct nf_conntrack_tuple srctuple;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
+		memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+		return true;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       NFPROTO_IPV4, dev_net(skb->dev),
+			       &srctuple))
+		return false;
+
+	hash = nf_conntrack_find_get(dev_net(skb->dev),
+				     &nf_ct_zone_dflt,
+				     &srctuple);
+	if (!hash)
+		return false;
+
+	ct = nf_ct_tuplehash_to_ctrack(hash);
+	src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
+	memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+	nf_ct_put(ct);
+
+	return true;
+}
+
 /* Bring out ya dead! */
 static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
@@ -2204,6 +2239,7 @@ err_cachep:
 static struct nf_ct_hook nf_conntrack_hook = {
 	.update		= nf_conntrack_update,
 	.destroy	= destroy_conntrack,
+	.get_tuple_skb  = nf_conntrack_get_tuple_skb,
 };
 
 void nf_conntrack_init_end(void)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index fcc89706745b..7af246764a35 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -295,6 +295,17 @@ config NET_SCH_FQ_CODEL
 
 	  If unsure, say N.
 
+config NET_SCH_CAKE
+	tristate "Common Applications Kept Enhanced (CAKE)"
+	help
+	  Say Y here if you want to use the Common Applications Kept Enhanced
+          (CAKE) queue management algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_cake.
+
+	  If unsure, say N.
+
 config NET_SCH_FQ
 	tristate "Fair Queue"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 9a5a7077d217..673ee7d26ff2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
 obj-$(CONFIG_NET_SCH_FQ_CODEL)	+= sch_fq_codel.o
+obj-$(CONFIG_NET_SCH_CAKE)	+= sch_cake.o
 obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 66dc19746c63..148a89ab789b 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -149,10 +149,15 @@ EXPORT_SYMBOL(__tcf_idr_release);
 
 static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
 {
+	struct tc_cookie *act_cookie;
 	u32 cookie_len = 0;
 
-	if (act->act_cookie)
-		cookie_len = nla_total_size(act->act_cookie->len);
+	rcu_read_lock();
+	act_cookie = rcu_dereference(act->act_cookie);
+
+	if (act_cookie)
+		cookie_len = nla_total_size(act_cookie->len);
+	rcu_read_unlock();
 
 	return  nla_total_size(0) /* action number nested */
 		+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a3eef00cd711..3d6e265758c0 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -485,8 +485,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		return -ENOMEM;
 
 	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
-	if (err < 0)
+	if (err < 0) {
+		kfree(p);
 		return err;
+	}
 	exists = err;
 	if (exists && bind) {
 		kfree(p);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 487a152a852c..8b2474293db1 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -605,20 +605,22 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 					TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
 					&mask->vlan);
 
-			ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
-			if (eth_type_vlan(ethertype)) {
-				fl_set_key_vlan(tb, ethertype,
-						TCA_FLOWER_KEY_CVLAN_ID,
-						TCA_FLOWER_KEY_CVLAN_PRIO,
-						&key->cvlan, &mask->cvlan);
-				fl_set_key_val(tb, &key->basic.n_proto,
-					       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
-					       &mask->basic.n_proto,
-					       TCA_FLOWER_UNSPEC,
-					       sizeof(key->basic.n_proto));
-			} else {
-				key->basic.n_proto = ethertype;
-				mask->basic.n_proto = cpu_to_be16(~0);
+			if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
+				ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
+				if (eth_type_vlan(ethertype)) {
+					fl_set_key_vlan(tb, ethertype,
+							TCA_FLOWER_KEY_CVLAN_ID,
+							TCA_FLOWER_KEY_CVLAN_PRIO,
+							&key->cvlan, &mask->cvlan);
+					fl_set_key_val(tb, &key->basic.n_proto,
+						       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+						       &mask->basic.n_proto,
+						       TCA_FLOWER_UNSPEC,
+						       sizeof(key->basic.n_proto));
+				} else {
+					key->basic.n_proto = ethertype;
+					mask->basic.n_proto = cpu_to_be16(~0);
+				}
 			}
 		} else {
 			key->basic.n_proto = ethertype;
@@ -1344,14 +1346,16 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			 key->cvlan.vlan_tpid)))
 		goto nla_put_failure;
 
-	if (mask->cvlan.vlan_tpid) {
-		if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
-				 key->basic.n_proto))
-			goto nla_put_failure;
-	} else if (mask->vlan.vlan_tpid) {
-		if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-				 key->basic.n_proto))
-			goto nla_put_failure;
+	if (mask->basic.n_proto) {
+		if (mask->cvlan.vlan_tpid) {
+			if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+					 key->basic.n_proto))
+				goto nla_put_failure;
+		} else if (mask->vlan.vlan_tpid) {
+			if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+					 key->basic.n_proto))
+				goto nla_put_failure;
+		}
 	}
 
 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
new file mode 100644
index 000000000000..30695691e9ff
--- /dev/null
+++ b/net/sched/sch_cake.c
@@ -0,0 +1,3019 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* COMMON Applications Kept Enhanced (CAKE) discipline
+ *
+ * Copyright (C) 2014-2018 Jonathan Morton <[email protected]>
+ * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <[email protected]>
+ * Copyright (C) 2014-2018 Dave Täht <[email protected]>
+ * Copyright (C) 2015-2018 Sebastian Moeller <[email protected]>
+ * (C) 2015-2018 Kevin Darbyshire-Bryant <[email protected]>
+ * Copyright (C) 2017-2018 Ryan Mounce <[email protected]>
+ *
+ * The CAKE Principles:
+ *		   (or, how to have your cake and eat it too)
+ *
+ * This is a combination of several shaping, AQM and FQ techniques into one
+ * easy-to-use package:
+ *
+ * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE
+ *   equipment and bloated MACs.  This operates in deficit mode (as in sch_fq),
+ *   eliminating the need for any sort of burst parameter (eg. token bucket
+ *   depth).  Burst support is limited to that necessary to overcome scheduling
+ *   latency.
+ *
+ * - A Diffserv-aware priority queue, giving more priority to certain classes,
+ *   up to a specified fraction of bandwidth.  Above that bandwidth threshold,
+ *   the priority is reduced to avoid starving other tins.
+ *
+ * - Each priority tin has a separate Flow Queue system, to isolate traffic
+ *   flows from each other.  This prevents a burst on one flow from increasing
+ *   the delay to another.  Flows are distributed to queues using a
+ *   set-associative hash function.
+ *
+ * - Each queue is actively managed by Cobalt, which is a combination of the
+ *   Codel and Blue AQM algorithms.  This serves flows fairly, and signals
+ *   congestion early via ECN (if available) and/or packet drops, to keep
+ *   latency low.  The codel parameters are auto-tuned based on the bandwidth
+ *   setting, as is necessary at low bandwidths.
+ *
+ * The configuration parameters are kept deliberately simple for ease of use.
+ * Everything has sane defaults.  Complete generality of configuration is *not*
+ * a goal.
+ *
+ * The priority queue operates according to a weighted DRR scheme, combined with
+ * a bandwidth tracker which reuses the shaper logic to detect which side of the
+ * bandwidth sharing threshold the tin is operating.  This determines whether a
+ * priority-based weight (high) or a bandwidth-based weight (low) is used for
+ * that tin in the current pass.
+ *
+ * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly
+ * granted us permission to leverage.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/reciprocal_div.h>
+#include <net/netlink.h>
+#include <linux/version.h>
+#include <linux/if_vlan.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/tcp.h>
+#include <net/flow_dissector.h>
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
+#define CAKE_SET_WAYS (8)
+#define CAKE_MAX_TINS (8)
+#define CAKE_QUEUES (1024)
+#define CAKE_FLOW_MASK 63
+#define CAKE_FLOW_NAT_FLAG 64
+#define CAKE_SPLIT_GSO_THRESHOLD (125000000) /* 1Gbps */
+
+/* struct cobalt_params - contains codel and blue parameters
+ * @interval:	codel initial drop rate
+ * @target:     maximum persistent sojourn time & blue update rate
+ * @mtu_time:   serialisation delay of maximum-size packet
+ * @p_inc:      increment of blue drop probability (0.32 fxp)
+ * @p_dec:      decrement of blue drop probability (0.32 fxp)
+ */
+struct cobalt_params {
+	u64	interval;
+	u64	target;
+	u64	mtu_time;
+	u32	p_inc;
+	u32	p_dec;
+};
+
+/* struct cobalt_vars - contains codel and blue variables
+ * @count:		codel dropping frequency
+ * @rec_inv_sqrt:	reciprocal value of sqrt(count) >> 1
+ * @drop_next:		time to drop next packet, or when we dropped last
+ * @blue_timer:		Blue time to next drop
+ * @p_drop:		BLUE drop probability (0.32 fxp)
+ * @dropping:		set if in dropping state
+ * @ecn_marked:		set if marked
+ */
+struct cobalt_vars {
+	u32	count;
+	u32	rec_inv_sqrt;
+	ktime_t	drop_next;
+	ktime_t	blue_timer;
+	u32     p_drop;
+	bool	dropping;
+	bool    ecn_marked;
+};
+
+enum {
+	CAKE_SET_NONE = 0,
+	CAKE_SET_SPARSE,
+	CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */
+	CAKE_SET_BULK,
+	CAKE_SET_DECAYING
+};
+
+struct cake_flow {
+	/* this stuff is all needed per-flow at dequeue time */
+	struct sk_buff	  *head;
+	struct sk_buff	  *tail;
+	struct list_head  flowchain;
+	s32		  deficit;
+	u32		  dropped;
+	struct cobalt_vars cvars;
+	u16		  srchost; /* index into cake_host table */
+	u16		  dsthost;
+	u8		  set;
+}; /* please try to keep this structure <= 64 bytes */
+
+struct cake_host {
+	u32 srchost_tag;
+	u32 dsthost_tag;
+	u16 srchost_refcnt;
+	u16 dsthost_refcnt;
+};
+
+struct cake_heap_entry {
+	u16 t:3, b:10;
+};
+
+struct cake_tin_data {
+	struct cake_flow flows[CAKE_QUEUES];
+	u32	backlogs[CAKE_QUEUES];
+	u32	tags[CAKE_QUEUES]; /* for set association */
+	u16	overflow_idx[CAKE_QUEUES];
+	struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */
+	u16	flow_quantum;
+
+	struct cobalt_params cparams;
+	u32	drop_overlimit;
+	u16	bulk_flow_count;
+	u16	sparse_flow_count;
+	u16	decaying_flow_count;
+	u16	unresponsive_flow_count;
+
+	u32	max_skblen;
+
+	struct list_head new_flows;
+	struct list_head old_flows;
+	struct list_head decaying_flows;
+
+	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+	ktime_t	time_next_packet;
+	u64	tin_rate_ns;
+	u64	tin_rate_bps;
+	u16	tin_rate_shft;
+
+	u16	tin_quantum_prio;
+	u16	tin_quantum_band;
+	s32	tin_deficit;
+	u32	tin_backlog;
+	u32	tin_dropped;
+	u32	tin_ecn_mark;
+
+	u32	packets;
+	u64	bytes;
+
+	u32	ack_drops;
+
+	/* moving averages */
+	u64 avge_delay;
+	u64 peak_delay;
+	u64 base_delay;
+
+	/* hash function stats */
+	u32	way_directs;
+	u32	way_hits;
+	u32	way_misses;
+	u32	way_collisions;
+}; /* number of tins is small, so size of this struct doesn't matter much */
+
+struct cake_sched_data {
+	struct tcf_proto __rcu *filter_list; /* optional external classifier */
+	struct tcf_block *block;
+	struct cake_tin_data *tins;
+
+	struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
+	u16		overflow_timeout;
+
+	u16		tin_cnt;
+	u8		tin_mode;
+	u8		flow_mode;
+	u8		ack_filter;
+	u8		atm_mode;
+
+	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+	u16		rate_shft;
+	ktime_t		time_next_packet;
+	ktime_t		failsafe_next_packet;
+	u64		rate_ns;
+	u64		rate_bps;
+	u16		rate_flags;
+	s16		rate_overhead;
+	u16		rate_mpu;
+	u64		interval;
+	u64		target;
+
+	/* resource tracking */
+	u32		buffer_used;
+	u32		buffer_max_used;
+	u32		buffer_limit;
+	u32		buffer_config_limit;
+
+	/* indices for dequeue */
+	u16		cur_tin;
+	u16		cur_flow;
+
+	struct qdisc_watchdog watchdog;
+	const u8	*tin_index;
+	const u8	*tin_order;
+
+	/* bandwidth capacity estimate */
+	ktime_t		last_packet_time;
+	ktime_t		avg_window_begin;
+	u64		avg_packet_interval;
+	u64		avg_window_bytes;
+	u64		avg_peak_bandwidth;
+	ktime_t		last_reconfig_time;
+
+	/* packet length stats */
+	u32		avg_netoff;
+	u16		max_netlen;
+	u16		max_adjlen;
+	u16		min_netlen;
+	u16		min_adjlen;
+};
+
+enum {
+	CAKE_FLAG_OVERHEAD	   = BIT(0),
+	CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
+	CAKE_FLAG_INGRESS	   = BIT(2),
+	CAKE_FLAG_WASH		   = BIT(3),
+	CAKE_FLAG_SPLIT_GSO	   = BIT(4)
+};
+
+/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
+ * obtain the best features of each.  Codel is excellent on flows which
+ * respond to congestion signals in a TCP-like way.  BLUE is more effective on
+ * unresponsive flows.
+ */
+
+struct cobalt_skb_cb {
+	ktime_t enqueue_time;
+	u32     adjusted_len;
+};
+
+static u64 us_to_ns(u64 us)
+{
+	return us * NSEC_PER_USEC;
+}
+
+static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb)
+{
+	qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb));
+	return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb)
+{
+	return get_cobalt_cb(skb)->enqueue_time;
+}
+
+static void cobalt_set_enqueue_time(struct sk_buff *skb,
+				    ktime_t now)
+{
+	get_cobalt_cb(skb)->enqueue_time = now;
+}
+
+static u16 quantum_div[CAKE_QUEUES + 1] = {0};
+
+/* Diffserv lookup tables */
+
+static const u8 precedence[] = {
+	0, 0, 0, 0, 0, 0, 0, 0,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	2, 2, 2, 2, 2, 2, 2, 2,
+	3, 3, 3, 3, 3, 3, 3, 3,
+	4, 4, 4, 4, 4, 4, 4, 4,
+	5, 5, 5, 5, 5, 5, 5, 5,
+	6, 6, 6, 6, 6, 6, 6, 6,
+	7, 7, 7, 7, 7, 7, 7, 7,
+};
+
+static const u8 diffserv8[] = {
+	2, 5, 1, 2, 4, 2, 2, 2,
+	0, 2, 1, 2, 1, 2, 1, 2,
+	5, 2, 4, 2, 4, 2, 4, 2,
+	3, 2, 3, 2, 3, 2, 3, 2,
+	6, 2, 3, 2, 3, 2, 3, 2,
+	6, 2, 2, 2, 6, 2, 6, 2,
+	7, 2, 2, 2, 2, 2, 2, 2,
+	7, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const u8 diffserv4[] = {
+	0, 2, 0, 0, 2, 0, 0, 0,
+	1, 0, 0, 0, 0, 0, 0, 0,
+	2, 0, 2, 0, 2, 0, 2, 0,
+	2, 0, 2, 0, 2, 0, 2, 0,
+	3, 0, 2, 0, 2, 0, 2, 0,
+	3, 0, 0, 0, 3, 0, 3, 0,
+	3, 0, 0, 0, 0, 0, 0, 0,
+	3, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 diffserv3[] = {
+	0, 0, 0, 0, 2, 0, 0, 0,
+	1, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 2, 0, 2, 0,
+	2, 0, 0, 0, 0, 0, 0, 0,
+	2, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 besteffort[] = {
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* tin priority order for stats dumping */
+
+static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
+static const u8 bulk_order[] = {1, 0, 2, 3};
+
+#define REC_INV_SQRT_CACHE (16)
+static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
+
+/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+
+static void cobalt_newton_step(struct cobalt_vars *vars)
+{
+	u32 invsqrt, invsqrt2;
+	u64 val;
+
+	invsqrt = vars->rec_inv_sqrt;
+	invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
+	val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+
+	val >>= 2; /* avoid overflow in following multiply */
+	val = (val * invsqrt) >> (32 - 2 + 1);
+
+	vars->rec_inv_sqrt = val;
+}
+
+static void cobalt_invsqrt(struct cobalt_vars *vars)
+{
+	if (vars->count < REC_INV_SQRT_CACHE)
+		vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
+	else
+		cobalt_newton_step(vars);
+}
+
+/* There is a big difference in timing between the accurate values placed in
+ * the cache and the approximations given by a single Newton step for small
+ * count values, particularly when stepping from count 1 to 2 or vice versa.
+ * Above 16, a single Newton step gives sufficient accuracy in either
+ * direction, given the precision stored.
+ *
+ * The magnitude of the error when stepping up to count 2 is such as to give
+ * the value that *should* have been produced at count 4.
+ */
+
+static void cobalt_cache_init(void)
+{
+	struct cobalt_vars v;
+
+	memset(&v, 0, sizeof(v));
+	v.rec_inv_sqrt = ~0U;
+	cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
+
+	for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+
+		cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
+	}
+}
+
+static void cobalt_vars_init(struct cobalt_vars *vars)
+{
+	memset(vars, 0, sizeof(*vars));
+
+	if (!cobalt_rec_inv_sqrt_cache[0]) {
+		cobalt_cache_init();
+		cobalt_rec_inv_sqrt_cache[0] = ~0;
+	}
+}
+
+/* CoDel control_law is t + interval/sqrt(count)
+ * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
+ * both sqrt() and divide operation.
+ */
+static ktime_t cobalt_control(ktime_t t,
+			      u64 interval,
+			      u32 rec_inv_sqrt)
+{
+	return ktime_add_ns(t, reciprocal_scale(interval,
+						rec_inv_sqrt));
+}
+
+/* Call this when a packet had to be dropped due to queue overflow.  Returns
+ * true if the BLUE state was quiescent before but active after this call.
+ */
+static bool cobalt_queue_full(struct cobalt_vars *vars,
+			      struct cobalt_params *p,
+			      ktime_t now)
+{
+	bool up = false;
+
+	if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+		up = !vars->p_drop;
+		vars->p_drop += p->p_inc;
+		if (vars->p_drop < p->p_inc)
+			vars->p_drop = ~0;
+		vars->blue_timer = now;
+	}
+	vars->dropping = true;
+	vars->drop_next = now;
+	if (!vars->count)
+		vars->count = 1;
+
+	return up;
+}
+
+/* Call this when the queue was serviced but turned out to be empty.  Returns
+ * true if the BLUE state was active before but quiescent after this call.
+ */
+static bool cobalt_queue_empty(struct cobalt_vars *vars,
+			       struct cobalt_params *p,
+			       ktime_t now)
+{
+	bool down = false;
+
+	if (vars->p_drop &&
+	    ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+		if (vars->p_drop < p->p_dec)
+			vars->p_drop = 0;
+		else
+			vars->p_drop -= p->p_dec;
+		vars->blue_timer = now;
+		down = !vars->p_drop;
+	}
+	vars->dropping = false;
+
+	if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
+		vars->count--;
+		cobalt_invsqrt(vars);
+		vars->drop_next = cobalt_control(vars->drop_next,
+						 p->interval,
+						 vars->rec_inv_sqrt);
+	}
+
+	return down;
+}
+
+/* Call this with a freshly dequeued packet for possible congestion marking.
+ * Returns true as an instruction to drop the packet, false for delivery.
+ */
+static bool cobalt_should_drop(struct cobalt_vars *vars,
+			       struct cobalt_params *p,
+			       ktime_t now,
+			       struct sk_buff *skb,
+			       u32 bulk_flows)
+{
+	bool next_due, over_target, drop = false;
+	ktime_t schedule;
+	u64 sojourn;
+
+/* The 'schedule' variable records, in its sign, whether 'now' is before or
+ * after 'drop_next'.  This allows 'drop_next' to be updated before the next
+ * scheduling decision is actually branched, without destroying that
+ * information.  Similarly, the first 'schedule' value calculated is preserved
+ * in the boolean 'next_due'.
+ *
+ * As for 'drop_next', we take advantage of the fact that 'interval' is both
+ * the delay between first exceeding 'target' and the first signalling event,
+ * *and* the scaling factor for the signalling frequency.  It's therefore very
+ * natural to use a single mechanism for both purposes, and eliminates a
+ * significant amount of reference Codel's spaghetti code.  To help with this,
+ * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close
+ * as possible to 1.0 in fixed-point.
+ */
+
+	sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+	schedule = ktime_sub(now, vars->drop_next);
+	over_target = sojourn > p->target &&
+		      sojourn > p->mtu_time * bulk_flows * 2 &&
+		      sojourn > p->mtu_time * 4;
+	next_due = vars->count && ktime_to_ns(schedule) >= 0;
+
+	vars->ecn_marked = false;
+
+	if (over_target) {
+		if (!vars->dropping) {
+			vars->dropping = true;
+			vars->drop_next = cobalt_control(now,
+							 p->interval,
+							 vars->rec_inv_sqrt);
+		}
+		if (!vars->count)
+			vars->count = 1;
+	} else if (vars->dropping) {
+		vars->dropping = false;
+	}
+
+	if (next_due && vars->dropping) {
+		/* Use ECN mark if possible, otherwise drop */
+		drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
+
+		vars->count++;
+		if (!vars->count)
+			vars->count--;
+		cobalt_invsqrt(vars);
+		vars->drop_next = cobalt_control(vars->drop_next,
+						 p->interval,
+						 vars->rec_inv_sqrt);
+		schedule = ktime_sub(now, vars->drop_next);
+	} else {
+		while (next_due) {
+			vars->count--;
+			cobalt_invsqrt(vars);
+			vars->drop_next = cobalt_control(vars->drop_next,
+							 p->interval,
+							 vars->rec_inv_sqrt);
+			schedule = ktime_sub(now, vars->drop_next);
+			next_due = vars->count && ktime_to_ns(schedule) >= 0;
+		}
+	}
+
+	/* Simple BLUE implementation.  Lack of ECN is deliberate. */
+	if (vars->p_drop)
+		drop |= (prandom_u32() < vars->p_drop);
+
+	/* Overload the drop_next field as an activity timeout */
+	if (!vars->count)
+		vars->drop_next = ktime_add_ns(now, p->interval);
+	else if (ktime_to_ns(schedule) > 0 && !drop)
+		vars->drop_next = now;
+
+	return drop;
+}
+
+static void cake_update_flowkeys(struct flow_keys *keys,
+				 const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	struct nf_conntrack_tuple tuple = {};
+	bool rev = !skb->_nfct;
+
+	if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+		return;
+
+	if (!nf_ct_get_tuple_skb(&tuple, skb))
+		return;
+
+	keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+	keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+
+	if (keys->ports.ports) {
+		keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
+		keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+	}
+#endif
+}
+
+/* Cake has several subtle multiple bit settings. In these cases you
+ *  would be matching triple isolate mode as well.
+ */
+
+static bool cake_dsrc(int flow_mode)
+{
+	return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC;
+}
+
+static bool cake_ddst(int flow_mode)
+{
+	return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST;
+}
+
+static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
+		     int flow_mode)
+{
+	u32 flow_hash = 0, srchost_hash, dsthost_hash;
+	u16 reduced_hash, srchost_idx, dsthost_idx;
+	struct flow_keys keys, host_keys;
+
+	if (unlikely(flow_mode == CAKE_FLOW_NONE))
+		return 0;
+
+	skb_flow_dissect_flow_keys(skb, &keys,
+				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+	if (flow_mode & CAKE_FLOW_NAT_FLAG)
+		cake_update_flowkeys(&keys, skb);
+
+	/* flow_hash_from_keys() sorts the addresses by value, so we have
+	 * to preserve their order in a separate data structure to treat
+	 * src and dst host addresses as independently selectable.
+	 */
+	host_keys = keys;
+	host_keys.ports.ports     = 0;
+	host_keys.basic.ip_proto  = 0;
+	host_keys.keyid.keyid     = 0;
+	host_keys.tags.flow_label = 0;
+
+	switch (host_keys.control.addr_type) {
+	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+		host_keys.addrs.v4addrs.src = 0;
+		dsthost_hash = flow_hash_from_keys(&host_keys);
+		host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+		host_keys.addrs.v4addrs.dst = 0;
+		srchost_hash = flow_hash_from_keys(&host_keys);
+		break;
+
+	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+		memset(&host_keys.addrs.v6addrs.src, 0,
+		       sizeof(host_keys.addrs.v6addrs.src));
+		dsthost_hash = flow_hash_from_keys(&host_keys);
+		host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+		memset(&host_keys.addrs.v6addrs.dst, 0,
+		       sizeof(host_keys.addrs.v6addrs.dst));
+		srchost_hash = flow_hash_from_keys(&host_keys);
+		break;
+
+	default:
+		dsthost_hash = 0;
+		srchost_hash = 0;
+	}
+
+	/* This *must* be after the above switch, since as a
+	 * side-effect it sorts the src and dst addresses.
+	 */
+	if (flow_mode & CAKE_FLOW_FLOWS)
+		flow_hash = flow_hash_from_keys(&keys);
+
+	if (!(flow_mode & CAKE_FLOW_FLOWS)) {
+		if (flow_mode & CAKE_FLOW_SRC_IP)
+			flow_hash ^= srchost_hash;
+
+		if (flow_mode & CAKE_FLOW_DST_IP)
+			flow_hash ^= dsthost_hash;
+	}
+
+	reduced_hash = flow_hash % CAKE_QUEUES;
+
+	/* set-associative hashing */
+	/* fast path if no hash collision (direct lookup succeeds) */
+	if (likely(q->tags[reduced_hash] == flow_hash &&
+		   q->flows[reduced_hash].set)) {
+		q->way_directs++;
+	} else {
+		u32 inner_hash = reduced_hash % CAKE_SET_WAYS;
+		u32 outer_hash = reduced_hash - inner_hash;
+		bool allocate_src = false;
+		bool allocate_dst = false;
+		u32 i, k;
+
+		/* check if any active queue in the set is reserved for
+		 * this flow.
+		 */
+		for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+		     i++, k = (k + 1) % CAKE_SET_WAYS) {
+			if (q->tags[outer_hash + k] == flow_hash) {
+				if (i)
+					q->way_hits++;
+
+				if (!q->flows[outer_hash + k].set) {
+					/* need to increment host refcnts */
+					allocate_src = cake_dsrc(flow_mode);
+					allocate_dst = cake_ddst(flow_mode);
+				}
+
+				goto found;
+			}
+		}
+
+		/* no queue is reserved for this flow, look for an
+		 * empty one.
+		 */
+		for (i = 0; i < CAKE_SET_WAYS;
+			 i++, k = (k + 1) % CAKE_SET_WAYS) {
+			if (!q->flows[outer_hash + k].set) {
+				q->way_misses++;
+				allocate_src = cake_dsrc(flow_mode);
+				allocate_dst = cake_ddst(flow_mode);
+				goto found;
+			}
+		}
+
+		/* With no empty queues, default to the original
+		 * queue, accept the collision, update the host tags.
+		 */
+		q->way_collisions++;
+		q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
+		q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+		allocate_src = cake_dsrc(flow_mode);
+		allocate_dst = cake_ddst(flow_mode);
+found:
+		/* reserve queue for future packets in same flow */
+		reduced_hash = outer_hash + k;
+		q->tags[reduced_hash] = flow_hash;
+
+		if (allocate_src) {
+			srchost_idx = srchost_hash % CAKE_QUEUES;
+			inner_hash = srchost_idx % CAKE_SET_WAYS;
+			outer_hash = srchost_idx - inner_hash;
+			for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+				i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (q->hosts[outer_hash + k].srchost_tag ==
+				    srchost_hash)
+					goto found_src;
+			}
+			for (i = 0; i < CAKE_SET_WAYS;
+				i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (!q->hosts[outer_hash + k].srchost_refcnt)
+					break;
+			}
+			q->hosts[outer_hash + k].srchost_tag = srchost_hash;
+found_src:
+			srchost_idx = outer_hash + k;
+			q->hosts[srchost_idx].srchost_refcnt++;
+			q->flows[reduced_hash].srchost = srchost_idx;
+		}
+
+		if (allocate_dst) {
+			dsthost_idx = dsthost_hash % CAKE_QUEUES;
+			inner_hash = dsthost_idx % CAKE_SET_WAYS;
+			outer_hash = dsthost_idx - inner_hash;
+			for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+			     i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (q->hosts[outer_hash + k].dsthost_tag ==
+				    dsthost_hash)
+					goto found_dst;
+			}
+			for (i = 0; i < CAKE_SET_WAYS;
+			     i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (!q->hosts[outer_hash + k].dsthost_refcnt)
+					break;
+			}
+			q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
+found_dst:
+			dsthost_idx = outer_hash + k;
+			q->hosts[dsthost_idx].dsthost_refcnt++;
+			q->flows[reduced_hash].dsthost = dsthost_idx;
+		}
+	}
+
+	return reduced_hash;
+}
+
+/* helper functions : might be changed when/if skb use a standard list_head */
+/* remove one skb from head of slot queue */
+
+static struct sk_buff *dequeue_head(struct cake_flow *flow)
+{
+	struct sk_buff *skb = flow->head;
+
+	if (skb) {
+		flow->head = skb->next;
+		skb->next = NULL;
+	}
+
+	return skb;
+}
+
+/* add skb to flow queue (tail add) */
+
+static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb)
+{
+	if (!flow->head)
+		flow->head = skb;
+	else
+		flow->tail->next = skb;
+	flow->tail = skb;
+	skb->next = NULL;
+}
+
+static struct iphdr *cake_get_iphdr(const struct sk_buff *skb,
+				    struct ipv6hdr *buf)
+{
+	unsigned int offset = skb_network_offset(skb);
+	struct iphdr *iph;
+
+	iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf);
+
+	if (!iph)
+		return NULL;
+
+	if (iph->version == 4 && iph->protocol == IPPROTO_IPV6)
+		return skb_header_pointer(skb, offset + iph->ihl * 4,
+					  sizeof(struct ipv6hdr), buf);
+
+	else if (iph->version == 4)
+		return iph;
+
+	else if (iph->version == 6)
+		return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr),
+					  buf);
+
+	return NULL;
+}
+
+static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
+				      void *buf, unsigned int bufsize)
+{
+	unsigned int offset = skb_network_offset(skb);
+	const struct ipv6hdr *ipv6h;
+	const struct tcphdr *tcph;
+	const struct iphdr *iph;
+	struct ipv6hdr _ipv6h;
+	struct tcphdr _tcph;
+
+	ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
+
+	if (!ipv6h)
+		return NULL;
+
+	if (ipv6h->version == 4) {
+		iph = (struct iphdr *)ipv6h;
+		offset += iph->ihl * 4;
+
+		/* special-case 6in4 tunnelling, as that is a common way to get
+		 * v6 connectivity in the home
+		 */
+		if (iph->protocol == IPPROTO_IPV6) {
+			ipv6h = skb_header_pointer(skb, offset,
+						   sizeof(_ipv6h), &_ipv6h);
+
+			if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
+				return NULL;
+
+			offset += sizeof(struct ipv6hdr);
+
+		} else if (iph->protocol != IPPROTO_TCP) {
+			return NULL;
+		}
+
+	} else if (ipv6h->version == 6) {
+		if (ipv6h->nexthdr != IPPROTO_TCP)
+			return NULL;
+
+		offset += sizeof(struct ipv6hdr);
+	} else {
+		return NULL;
+	}
+
+	tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+	if (!tcph)
+		return NULL;
+
+	return skb_header_pointer(skb, offset,
+				  min(__tcp_hdrlen(tcph), bufsize), buf);
+}
+
+static const void *cake_get_tcpopt(const struct tcphdr *tcph,
+				   int code, int *oplen)
+{
+	/* inspired by tcp_parse_options in tcp_input.c */
+	int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
+	const u8 *ptr = (const u8 *)(tcph + 1);
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		if (opcode == TCPOPT_EOL)
+			break;
+		if (opcode == TCPOPT_NOP) {
+			length--;
+			continue;
+		}
+		opsize = *ptr++;
+		if (opsize < 2 || opsize > length)
+			break;
+
+		if (opcode == code) {
+			*oplen = opsize;
+			return ptr;
+		}
+
+		ptr += opsize - 2;
+		length -= opsize;
+	}
+
+	return NULL;
+}
+
+/* Compare two SACK sequences. A sequence is considered greater if it SACKs more
+ * bytes than the other. In the case where both sequences ACKs bytes that the
+ * other doesn't, A is considered greater. DSACKs in A also makes A be
+ * considered greater.
+ *
+ * @return -1, 0 or 1 as normal compare functions
+ */
+static int cake_tcph_sack_compare(const struct tcphdr *tcph_a,
+				  const struct tcphdr *tcph_b)
+{
+	const struct tcp_sack_block_wire *sack_a, *sack_b;
+	u32 ack_seq_a = ntohl(tcph_a->ack_seq);
+	u32 bytes_a = 0, bytes_b = 0;
+	int oplen_a, oplen_b;
+	bool first = true;
+
+	sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a);
+	sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b);
+
+	/* pointers point to option contents */
+	oplen_a -= TCPOLEN_SACK_BASE;
+	oplen_b -= TCPOLEN_SACK_BASE;
+
+	if (sack_a && oplen_a >= sizeof(*sack_a) &&
+	    (!sack_b || oplen_b < sizeof(*sack_b)))
+		return -1;
+	else if (sack_b && oplen_b >= sizeof(*sack_b) &&
+		 (!sack_a || oplen_a < sizeof(*sack_a)))
+		return 1;
+	else if ((!sack_a || oplen_a < sizeof(*sack_a)) &&
+		 (!sack_b || oplen_b < sizeof(*sack_b)))
+		return 0;
+
+	while (oplen_a >= sizeof(*sack_a)) {
+		const struct tcp_sack_block_wire *sack_tmp = sack_b;
+		u32 start_a = get_unaligned_be32(&sack_a->start_seq);
+		u32 end_a = get_unaligned_be32(&sack_a->end_seq);
+		int oplen_tmp = oplen_b;
+		bool found = false;
+
+		/* DSACK; always considered greater to prevent dropping */
+		if (before(start_a, ack_seq_a))
+			return -1;
+
+		bytes_a += end_a - start_a;
+
+		while (oplen_tmp >= sizeof(*sack_tmp)) {
+			u32 start_b = get_unaligned_be32(&sack_tmp->start_seq);
+			u32 end_b = get_unaligned_be32(&sack_tmp->end_seq);
+
+			/* first time through we count the total size */
+			if (first)
+				bytes_b += end_b - start_b;
+
+			if (!after(start_b, start_a) && !before(end_b, end_a)) {
+				found = true;
+				if (!first)
+					break;
+			}
+			oplen_tmp -= sizeof(*sack_tmp);
+			sack_tmp++;
+		}
+
+		if (!found)
+			return -1;
+
+		oplen_a -= sizeof(*sack_a);
+		sack_a++;
+		first = false;
+	}
+
+	/* If we made it this far, all ranges SACKed by A are covered by B, so
+	 * either the SACKs are equal, or B SACKs more bytes.
+	 */
+	return bytes_b > bytes_a ? 1 : 0;
+}
+
+static void cake_tcph_get_tstamp(const struct tcphdr *tcph,
+				 u32 *tsval, u32 *tsecr)
+{
+	const u8 *ptr;
+	int opsize;
+
+	ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize);
+
+	if (ptr && opsize == TCPOLEN_TIMESTAMP) {
+		*tsval = get_unaligned_be32(ptr);
+		*tsecr = get_unaligned_be32(ptr + 4);
+	}
+}
+
+static bool cake_tcph_may_drop(const struct tcphdr *tcph,
+			       u32 tstamp_new, u32 tsecr_new)
+{
+	/* inspired by tcp_parse_options in tcp_input.c */
+	int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
+	const u8 *ptr = (const u8 *)(tcph + 1);
+	u32 tstamp, tsecr;
+
+	/* 3 reserved flags must be unset to avoid future breakage
+	 * ACK must be set
+	 * ECE/CWR are handled separately
+	 * All other flags URG/PSH/RST/SYN/FIN must be unset
+	 * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero)
+	 * 0x00C00000 = CWR/ECE (handled separately)
+	 * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000
+	 */
+	if (((tcp_flag_word(tcph) &
+	      cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK))
+		return false;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		if (opcode == TCPOPT_EOL)
+			break;
+		if (opcode == TCPOPT_NOP) {
+			length--;
+			continue;
+		}
+		opsize = *ptr++;
+		if (opsize < 2 || opsize > length)
+			break;
+
+		switch (opcode) {
+		case TCPOPT_MD5SIG: /* doesn't influence state */
+			break;
+
+		case TCPOPT_SACK: /* stricter checking performed later */
+			if (opsize % 8 != 2)
+				return false;
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			/* only drop timestamps lower than new */
+			if (opsize != TCPOLEN_TIMESTAMP)
+				return false;
+			tstamp = get_unaligned_be32(ptr);
+			tsecr = get_unaligned_be32(ptr + 4);
+			if (after(tstamp, tstamp_new) ||
+			    after(tsecr, tsecr_new))
+				return false;
+			break;
+
+		case TCPOPT_MSS:  /* these should only be set on SYN */
+		case TCPOPT_WINDOW:
+		case TCPOPT_SACK_PERM:
+		case TCPOPT_FASTOPEN:
+		case TCPOPT_EXP:
+		default: /* don't drop if any unknown options are present */
+			return false;
+		}
+
+		ptr += opsize - 2;
+		length -= opsize;
+	}
+
+	return true;
+}
+
+static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
+				       struct cake_flow *flow)
+{
+	bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE;
+	struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL;
+	struct sk_buff *skb_check, *skb_prev = NULL;
+	const struct ipv6hdr *ipv6h, *ipv6h_check;
+	unsigned char _tcph[64], _tcph_check[64];
+	const struct tcphdr *tcph, *tcph_check;
+	const struct iphdr *iph, *iph_check;
+	struct ipv6hdr _iph, _iph_check;
+	const struct sk_buff *skb;
+	int seglen, num_found = 0;
+	u32 tstamp = 0, tsecr = 0;
+	__be32 elig_flags = 0;
+	int sack_comp;
+
+	/* no other possible ACKs to filter */
+	if (flow->head == flow->tail)
+		return NULL;
+
+	skb = flow->tail;
+	tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph));
+	iph = cake_get_iphdr(skb, &_iph);
+	if (!tcph)
+		return NULL;
+
+	cake_tcph_get_tstamp(tcph, &tstamp, &tsecr);
+
+	/* the 'triggering' packet need only have the ACK flag set.
+	 * also check that SYN is not set, as there won't be any previous ACKs.
+	 */
+	if ((tcp_flag_word(tcph) &
+	     (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK)
+		return NULL;
+
+	/* the 'triggering' ACK is at the tail of the queue, we have already
+	 * returned if it is the only packet in the flow. loop through the rest
+	 * of the queue looking for pure ACKs with the same 5-tuple as the
+	 * triggering one.
+	 */
+	for (skb_check = flow->head;
+	     skb_check && skb_check != skb;
+	     skb_prev = skb_check, skb_check = skb_check->next) {
+		iph_check = cake_get_iphdr(skb_check, &_iph_check);
+		tcph_check = cake_get_tcphdr(skb_check, &_tcph_check,
+					     sizeof(_tcph_check));
+
+		/* only TCP packets with matching 5-tuple are eligible, and only
+		 * drop safe headers
+		 */
+		if (!tcph_check || iph->version != iph_check->version ||
+		    tcph_check->source != tcph->source ||
+		    tcph_check->dest != tcph->dest)
+			continue;
+
+		if (iph_check->version == 4) {
+			if (iph_check->saddr != iph->saddr ||
+			    iph_check->daddr != iph->daddr)
+				continue;
+
+			seglen = ntohs(iph_check->tot_len) -
+				       (4 * iph_check->ihl);
+		} else if (iph_check->version == 6) {
+			ipv6h = (struct ipv6hdr *)iph;
+			ipv6h_check = (struct ipv6hdr *)iph_check;
+
+			if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) ||
+			    ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr))
+				continue;
+
+			seglen = ntohs(ipv6h_check->payload_len);
+		} else {
+			WARN_ON(1);  /* shouldn't happen */
+			continue;
+		}
+
+		/* If the ECE/CWR flags changed from the previous eligible
+		 * packet in the same flow, we should no longer be dropping that
+		 * previous packet as this would lose information.
+		 */
+		if (elig_ack && (tcp_flag_word(tcph_check) &
+				 (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) {
+			elig_ack = NULL;
+			elig_ack_prev = NULL;
+			num_found--;
+		}
+
+		/* Check TCP options and flags, don't drop ACKs with segment
+		 * data, and don't drop ACKs with a higher cumulative ACK
+		 * counter than the triggering packet. Check ACK seqno here to
+		 * avoid parsing SACK options of packets we are going to exclude
+		 * anyway.
+		 */
+		if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) ||
+		    (seglen - __tcp_hdrlen(tcph_check)) != 0 ||
+		    after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq)))
+			continue;
+
+		/* Check SACK options. The triggering packet must SACK more data
+		 * than the ACK under consideration, or SACK the same range but
+		 * have a larger cumulative ACK counter. The latter is a
+		 * pathological case, but is contained in the following check
+		 * anyway, just to be safe.
+		 */
+		sack_comp = cake_tcph_sack_compare(tcph_check, tcph);
+
+		if (sack_comp < 0 ||
+		    (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) &&
+		     sack_comp == 0))
+			continue;
+
+		/* At this point we have found an eligible pure ACK to drop; if
+		 * we are in aggressive mode, we are done. Otherwise, keep
+		 * searching unless this is the second eligible ACK we
+		 * found.
+		 *
+		 * Since we want to drop ACK closest to the head of the queue,
+		 * save the first eligible ACK we find, even if we need to loop
+		 * again.
+		 */
+		if (!elig_ack) {
+			elig_ack = skb_check;
+			elig_ack_prev = skb_prev;
+			elig_flags = (tcp_flag_word(tcph_check)
+				      & (TCP_FLAG_ECE | TCP_FLAG_CWR));
+		}
+
+		if (num_found++ > 0)
+			goto found;
+	}
+
+	/* We made it through the queue without finding two eligible ACKs . If
+	 * we found a single eligible ACK we can drop it in aggressive mode if
+	 * we can guarantee that this does not interfere with ECN flag
+	 * information. We ensure this by dropping it only if the enqueued
+	 * packet is consecutive with the eligible ACK, and their flags match.
+	 */
+	if (elig_ack && aggressive && elig_ack->next == skb &&
+	    (elig_flags == (tcp_flag_word(tcph) &
+			    (TCP_FLAG_ECE | TCP_FLAG_CWR))))
+		goto found;
+
+	return NULL;
+
+found:
+	if (elig_ack_prev)
+		elig_ack_prev->next = elig_ack->next;
+	else
+		flow->head = elig_ack->next;
+
+	elig_ack->next = NULL;
+
+	return elig_ack;
+}
+
+static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
+{
+	avg -= avg >> shift;
+	avg += sample >> shift;
+	return avg;
+}
+
+static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+{
+	if (q->rate_flags & CAKE_FLAG_OVERHEAD)
+		len -= off;
+
+	if (q->max_netlen < len)
+		q->max_netlen = len;
+	if (q->min_netlen > len)
+		q->min_netlen = len;
+
+	len += q->rate_overhead;
+
+	if (len < q->rate_mpu)
+		len = q->rate_mpu;
+
+	if (q->atm_mode == CAKE_ATM_ATM) {
+		len += 47;
+		len /= 48;
+		len *= 53;
+	} else if (q->atm_mode == CAKE_ATM_PTM) {
+		/* Add one byte per 64 bytes or part thereof.
+		 * This is conservative and easier to calculate than the
+		 * precise value.
+		 */
+		len += (len + 63) / 64;
+	}
+
+	if (q->max_adjlen < len)
+		q->max_adjlen = len;
+	if (q->min_adjlen > len)
+		q->min_adjlen = len;
+
+	return len;
+}
+
+static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len, last_len = 0;
+	u32 off = skb_network_offset(skb);
+	u32 len = qdisc_pkt_len(skb);
+	u16 segs = 1;
+
+	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+
+	if (!shinfo->gso_size)
+		return cake_calc_overhead(q, len, off);
+
+	/* borrowed from qdisc_pkt_len_init() */
+	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+	/* + transport layer */
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
+						SKB_GSO_TCPV6))) {
+		const struct tcphdr *th;
+		struct tcphdr _tcphdr;
+
+		th = skb_header_pointer(skb, skb_transport_offset(skb),
+					sizeof(_tcphdr), &_tcphdr);
+		if (likely(th))
+			hdr_len += __tcp_hdrlen(th);
+	} else {
+		struct udphdr _udphdr;
+
+		if (skb_header_pointer(skb, skb_transport_offset(skb),
+				       sizeof(_udphdr), &_udphdr))
+			hdr_len += sizeof(struct udphdr);
+	}
+
+	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
+		segs = DIV_ROUND_UP(skb->len - hdr_len,
+				    shinfo->gso_size);
+	else
+		segs = shinfo->gso_segs;
+
+	len = shinfo->gso_size + hdr_len;
+	last_len = skb->len - shinfo->gso_size * (segs - 1);
+
+	return (cake_calc_overhead(q, len, off) * (segs - 1) +
+		cake_calc_overhead(q, last_len, off));
+}
+
+static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
+{
+	struct cake_heap_entry ii = q->overflow_heap[i];
+	struct cake_heap_entry jj = q->overflow_heap[j];
+
+	q->overflow_heap[i] = jj;
+	q->overflow_heap[j] = ii;
+
+	q->tins[ii.t].overflow_idx[ii.b] = j;
+	q->tins[jj.t].overflow_idx[jj.b] = i;
+}
+
+static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i)
+{
+	struct cake_heap_entry ii = q->overflow_heap[i];
+
+	return q->tins[ii.t].backlogs[ii.b];
+}
+
+static void cake_heapify(struct cake_sched_data *q, u16 i)
+{
+	static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES;
+	u32 mb = cake_heap_get_backlog(q, i);
+	u32 m = i;
+
+	while (m < a) {
+		u32 l = m + m + 1;
+		u32 r = l + 1;
+
+		if (l < a) {
+			u32 lb = cake_heap_get_backlog(q, l);
+
+			if (lb > mb) {
+				m  = l;
+				mb = lb;
+			}
+		}
+
+		if (r < a) {
+			u32 rb = cake_heap_get_backlog(q, r);
+
+			if (rb > mb) {
+				m  = r;
+				mb = rb;
+			}
+		}
+
+		if (m != i) {
+			cake_heap_swap(q, i, m);
+			i = m;
+		} else {
+			break;
+		}
+	}
+}
+
+static void cake_heapify_up(struct cake_sched_data *q, u16 i)
+{
+	while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) {
+		u16 p = (i - 1) >> 1;
+		u32 ib = cake_heap_get_backlog(q, i);
+		u32 pb = cake_heap_get_backlog(q, p);
+
+		if (ib > pb) {
+			cake_heap_swap(q, i, p);
+			i = p;
+		} else {
+			break;
+		}
+	}
+}
+
+static int cake_advance_shaper(struct cake_sched_data *q,
+			       struct cake_tin_data *b,
+			       struct sk_buff *skb,
+			       ktime_t now, bool drop)
+{
+	u32 len = get_cobalt_cb(skb)->adjusted_len;
+
+	/* charge packet bandwidth to this tin
+	 * and to the global shaper.
+	 */
+	if (q->rate_ns) {
+		u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft;
+		u64 global_dur = (len * q->rate_ns) >> q->rate_shft;
+		u64 failsafe_dur = global_dur + (global_dur >> 1);
+
+		if (ktime_before(b->time_next_packet, now))
+			b->time_next_packet = ktime_add_ns(b->time_next_packet,
+							   tin_dur);
+
+		else if (ktime_before(b->time_next_packet,
+				      ktime_add_ns(now, tin_dur)))
+			b->time_next_packet = ktime_add_ns(now, tin_dur);
+
+		q->time_next_packet = ktime_add_ns(q->time_next_packet,
+						   global_dur);
+		if (!drop)
+			q->failsafe_next_packet = \
+				ktime_add_ns(q->failsafe_next_packet,
+					     failsafe_dur);
+	}
+	return len;
+}
+
+static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	ktime_t now = ktime_get();
+	u32 idx = 0, tin = 0, len;
+	struct cake_heap_entry qq;
+	struct cake_tin_data *b;
+	struct cake_flow *flow;
+	struct sk_buff *skb;
+
+	if (!q->overflow_timeout) {
+		int i;
+		/* Build fresh max-heap */
+		for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
+			cake_heapify(q, i);
+	}
+	q->overflow_timeout = 65535;
+
+	/* select longest queue for pruning */
+	qq  = q->overflow_heap[0];
+	tin = qq.t;
+	idx = qq.b;
+
+	b = &q->tins[tin];
+	flow = &b->flows[idx];
+	skb = dequeue_head(flow);
+	if (unlikely(!skb)) {
+		/* heap has gone wrong, rebuild it next time */
+		q->overflow_timeout = 0;
+		return idx + (tin << 16);
+	}
+
+	if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
+		b->unresponsive_flow_count++;
+
+	len = qdisc_pkt_len(skb);
+	q->buffer_used      -= skb->truesize;
+	b->backlogs[idx]    -= len;
+	b->tin_backlog      -= len;
+	sch->qstats.backlog -= len;
+	qdisc_tree_reduce_backlog(sch, 1, len);
+
+	flow->dropped++;
+	b->tin_dropped++;
+	sch->qstats.drops++;
+
+	if (q->rate_flags & CAKE_FLAG_INGRESS)
+		cake_advance_shaper(q, b, skb, now, true);
+
+	__qdisc_drop(skb, to_free);
+	sch->q.qlen--;
+
+	cake_heapify(q, 0);
+
+	return idx + (tin << 16);
+}
+
+static void cake_wash_diffserv(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+		break;
+	case htons(ETH_P_IPV6):
+		ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+		break;
+	default:
+		break;
+	}
+}
+
+static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+{
+	u8 dscp;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+		if (wash && dscp)
+			ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+		return dscp;
+
+	case htons(ETH_P_IPV6):
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+		if (wash && dscp)
+			ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+		return dscp;
+
+	case htons(ETH_P_ARP):
+		return 0x38;  /* CS7 - Net Control */
+
+	default:
+		/* If there is no Diffserv field, treat as best-effort */
+		return 0;
+	}
+}
+
+static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
+					     struct sk_buff *skb)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 tin;
+
+	if (TC_H_MAJ(skb->priority) == sch->handle &&
+	    TC_H_MIN(skb->priority) > 0 &&
+	    TC_H_MIN(skb->priority) <= q->tin_cnt) {
+		tin = TC_H_MIN(skb->priority) - 1;
+
+		if (q->rate_flags & CAKE_FLAG_WASH)
+			cake_wash_diffserv(skb);
+	} else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
+		/* extract the Diffserv Precedence field, if it exists */
+		/* and clear DSCP bits if washing */
+		tin = q->tin_index[cake_handle_diffserv(skb,
+				q->rate_flags & CAKE_FLAG_WASH)];
+		if (unlikely(tin >= q->tin_cnt))
+			tin = 0;
+	} else {
+		tin = 0;
+		if (q->rate_flags & CAKE_FLAG_WASH)
+			cake_wash_diffserv(skb);
+	}
+
+	return &q->tins[tin];
+}
+
+static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
+			 struct sk_buff *skb, int flow_mode, int *qerr)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct tcf_proto *filter;
+	struct tcf_result res;
+	u32 flow = 0;
+	int result;
+
+	filter = rcu_dereference_bh(q->filter_list);
+	if (!filter)
+		goto hash;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tcf_classify(skb, filter, &res, false);
+
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+		case TC_ACT_TRAP:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
+		case TC_ACT_SHOT:
+			return 0;
+		}
+#endif
+		if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
+			flow = TC_H_MIN(res.classid);
+	}
+hash:
+	*t = cake_select_tin(sch, skb);
+	return flow ?: cake_hash(*t, skb, flow_mode) + 1;
+}
+
+static void cake_reconfigure(struct Qdisc *sch);
+
+static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			struct sk_buff **to_free)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int len = qdisc_pkt_len(skb);
+	int uninitialized_var(ret);
+	struct sk_buff *ack = NULL;
+	ktime_t now = ktime_get();
+	struct cake_tin_data *b;
+	struct cake_flow *flow;
+	u32 idx;
+
+	/* choose flow to insert into */
+	idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
+	if (idx == 0) {
+		if (ret & __NET_XMIT_BYPASS)
+			qdisc_qstats_drop(sch);
+		__qdisc_drop(skb, to_free);
+		return ret;
+	}
+	idx--;
+	flow = &b->flows[idx];
+
+	/* ensure shaper state isn't stale */
+	if (!b->tin_backlog) {
+		if (ktime_before(b->time_next_packet, now))
+			b->time_next_packet = now;
+
+		if (!sch->q.qlen) {
+			if (ktime_before(q->time_next_packet, now)) {
+				q->failsafe_next_packet = now;
+				q->time_next_packet = now;
+			} else if (ktime_after(q->time_next_packet, now) &&
+				   ktime_after(q->failsafe_next_packet, now)) {
+				u64 next = \
+					min(ktime_to_ns(q->time_next_packet),
+					    ktime_to_ns(
+						   q->failsafe_next_packet));
+				sch->qstats.overlimits++;
+				qdisc_watchdog_schedule_ns(&q->watchdog, next);
+			}
+		}
+	}
+
+	if (unlikely(len > b->max_skblen))
+		b->max_skblen = len;
+
+	if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
+		struct sk_buff *segs, *nskb;
+		netdev_features_t features = netif_skb_features(skb);
+		unsigned int slen = 0;
+
+		segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+		if (IS_ERR_OR_NULL(segs))
+			return qdisc_drop(skb, sch, to_free);
+
+		while (segs) {
+			nskb = segs->next;
+			segs->next = NULL;
+			qdisc_skb_cb(segs)->pkt_len = segs->len;
+			cobalt_set_enqueue_time(segs, now);
+			get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
+									  segs);
+			flow_queue_add(flow, segs);
+
+			sch->q.qlen++;
+			slen += segs->len;
+			q->buffer_used += segs->truesize;
+			b->packets++;
+			segs = nskb;
+		}
+
+		/* stats */
+		b->bytes	    += slen;
+		b->backlogs[idx]    += slen;
+		b->tin_backlog      += slen;
+		sch->qstats.backlog += slen;
+		q->avg_window_bytes += slen;
+
+		qdisc_tree_reduce_backlog(sch, 1, len);
+		consume_skb(skb);
+	} else {
+		/* not splitting */
+		cobalt_set_enqueue_time(skb, now);
+		get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
+		flow_queue_add(flow, skb);
+
+		if (q->ack_filter)
+			ack = cake_ack_filter(q, flow);
+
+		if (ack) {
+			b->ack_drops++;
+			sch->qstats.drops++;
+			b->bytes += qdisc_pkt_len(ack);
+			len -= qdisc_pkt_len(ack);
+			q->buffer_used += skb->truesize - ack->truesize;
+			if (q->rate_flags & CAKE_FLAG_INGRESS)
+				cake_advance_shaper(q, b, ack, now, true);
+
+			qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack));
+			consume_skb(ack);
+		} else {
+			sch->q.qlen++;
+			q->buffer_used      += skb->truesize;
+		}
+
+		/* stats */
+		b->packets++;
+		b->bytes	    += len;
+		b->backlogs[idx]    += len;
+		b->tin_backlog      += len;
+		sch->qstats.backlog += len;
+		q->avg_window_bytes += len;
+	}
+
+	if (q->overflow_timeout)
+		cake_heapify_up(q, b->overflow_idx[idx]);
+
+	/* incoming bandwidth capacity estimate */
+	if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
+		u64 packet_interval = \
+			ktime_to_ns(ktime_sub(now, q->last_packet_time));
+
+		if (packet_interval > NSEC_PER_SEC)
+			packet_interval = NSEC_PER_SEC;
+
+		/* filter out short-term bursts, eg. wifi aggregation */
+		q->avg_packet_interval = \
+			cake_ewma(q->avg_packet_interval,
+				  packet_interval,
+				  (packet_interval > q->avg_packet_interval ?
+					  2 : 8));
+
+		q->last_packet_time = now;
+
+		if (packet_interval > q->avg_packet_interval) {
+			u64 window_interval = \
+				ktime_to_ns(ktime_sub(now,
+						      q->avg_window_begin));
+			u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
+
+			do_div(b, window_interval);
+			q->avg_peak_bandwidth =
+				cake_ewma(q->avg_peak_bandwidth, b,
+					  b > q->avg_peak_bandwidth ? 2 : 8);
+			q->avg_window_bytes = 0;
+			q->avg_window_begin = now;
+
+			if (ktime_after(now,
+					ktime_add_ms(q->last_reconfig_time,
+						     250))) {
+				q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
+				cake_reconfigure(sch);
+			}
+		}
+	} else {
+		q->avg_window_bytes = 0;
+		q->last_packet_time = now;
+	}
+
+	/* flowchain */
+	if (!flow->set || flow->set == CAKE_SET_DECAYING) {
+		struct cake_host *srchost = &b->hosts[flow->srchost];
+		struct cake_host *dsthost = &b->hosts[flow->dsthost];
+		u16 host_load = 1;
+
+		if (!flow->set) {
+			list_add_tail(&flow->flowchain, &b->new_flows);
+		} else {
+			b->decaying_flow_count--;
+			list_move_tail(&flow->flowchain, &b->new_flows);
+		}
+		flow->set = CAKE_SET_SPARSE;
+		b->sparse_flow_count++;
+
+		if (cake_dsrc(q->flow_mode))
+			host_load = max(host_load, srchost->srchost_refcnt);
+
+		if (cake_ddst(q->flow_mode))
+			host_load = max(host_load, dsthost->dsthost_refcnt);
+
+		flow->deficit = (b->flow_quantum *
+				 quantum_div[host_load]) >> 16;
+	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+		/* this flow was empty, accounted as a sparse flow, but actually
+		 * in the bulk rotation.
+		 */
+		flow->set = CAKE_SET_BULK;
+		b->sparse_flow_count--;
+		b->bulk_flow_count++;
+	}
+
+	if (q->buffer_used > q->buffer_max_used)
+		q->buffer_max_used = q->buffer_used;
+
+	if (q->buffer_used > q->buffer_limit) {
+		u32 dropped = 0;
+
+		while (q->buffer_used > q->buffer_limit) {
+			dropped++;
+			cake_drop(sch, to_free);
+		}
+		b->drop_overlimit += dropped;
+	}
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[q->cur_tin];
+	struct cake_flow *flow = &b->flows[q->cur_flow];
+	struct sk_buff *skb = NULL;
+	u32 len;
+
+	if (flow->head) {
+		skb = dequeue_head(flow);
+		len = qdisc_pkt_len(skb);
+		b->backlogs[q->cur_flow] -= len;
+		b->tin_backlog		 -= len;
+		sch->qstats.backlog      -= len;
+		q->buffer_used		 -= skb->truesize;
+		sch->q.qlen--;
+
+		if (q->overflow_timeout)
+			cake_heapify(q, b->overflow_idx[q->cur_flow]);
+	}
+	return skb;
+}
+
+/* Discard leftover packets from a tin no longer in use. */
+static void cake_clear_tin(struct Qdisc *sch, u16 tin)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	q->cur_tin = tin;
+	for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++)
+		while (!!(skb = cake_dequeue_one(sch)))
+			kfree_skb(skb);
+}
+
+static struct sk_buff *cake_dequeue(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[q->cur_tin];
+	struct cake_host *srchost, *dsthost;
+	ktime_t now = ktime_get();
+	struct cake_flow *flow;
+	struct list_head *head;
+	bool first_flow = true;
+	struct sk_buff *skb;
+	u16 host_load;
+	u64 delay;
+	u32 len;
+
+begin:
+	if (!sch->q.qlen)
+		return NULL;
+
+	/* global hard shaper */
+	if (ktime_after(q->time_next_packet, now) &&
+	    ktime_after(q->failsafe_next_packet, now)) {
+		u64 next = min(ktime_to_ns(q->time_next_packet),
+			       ktime_to_ns(q->failsafe_next_packet));
+
+		sch->qstats.overlimits++;
+		qdisc_watchdog_schedule_ns(&q->watchdog, next);
+		return NULL;
+	}
+
+	/* Choose a class to work on. */
+	if (!q->rate_ns) {
+		/* In unlimited mode, can't rely on shaper timings, just balance
+		 * with DRR
+		 */
+		bool wrapped = false, empty = true;
+
+		while (b->tin_deficit < 0 ||
+		       !(b->sparse_flow_count + b->bulk_flow_count)) {
+			if (b->tin_deficit <= 0)
+				b->tin_deficit += b->tin_quantum_band;
+			if (b->sparse_flow_count + b->bulk_flow_count)
+				empty = false;
+
+			q->cur_tin++;
+			b++;
+			if (q->cur_tin >= q->tin_cnt) {
+				q->cur_tin = 0;
+				b = q->tins;
+
+				if (wrapped) {
+					/* It's possible for q->qlen to be
+					 * nonzero when we actually have no
+					 * packets anywhere.
+					 */
+					if (empty)
+						return NULL;
+				} else {
+					wrapped = true;
+				}
+			}
+		}
+	} else {
+		/* In shaped mode, choose:
+		 * - Highest-priority tin with queue and meeting schedule, or
+		 * - The earliest-scheduled tin with queue.
+		 */
+		ktime_t best_time = KTIME_MAX;
+		int tin, best_tin = 0;
+
+		for (tin = 0; tin < q->tin_cnt; tin++) {
+			b = q->tins + tin;
+			if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
+				ktime_t time_to_pkt = \
+					ktime_sub(b->time_next_packet, now);
+
+				if (ktime_to_ns(time_to_pkt) <= 0 ||
+				    ktime_compare(time_to_pkt,
+						  best_time) <= 0) {
+					best_time = time_to_pkt;
+					best_tin = tin;
+				}
+			}
+		}
+
+		q->cur_tin = best_tin;
+		b = q->tins + best_tin;
+
+		/* No point in going further if no packets to deliver. */
+		if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
+			return NULL;
+	}
+
+retry:
+	/* service this class */
+	head = &b->decaying_flows;
+	if (!first_flow || list_empty(head)) {
+		head = &b->new_flows;
+		if (list_empty(head)) {
+			head = &b->old_flows;
+			if (unlikely(list_empty(head))) {
+				head = &b->decaying_flows;
+				if (unlikely(list_empty(head)))
+					goto begin;
+			}
+		}
+	}
+	flow = list_first_entry(head, struct cake_flow, flowchain);
+	q->cur_flow = flow - b->flows;
+	first_flow = false;
+
+	/* triple isolation (modified DRR++) */
+	srchost = &b->hosts[flow->srchost];
+	dsthost = &b->hosts[flow->dsthost];
+	host_load = 1;
+
+	if (cake_dsrc(q->flow_mode))
+		host_load = max(host_load, srchost->srchost_refcnt);
+
+	if (cake_ddst(q->flow_mode))
+		host_load = max(host_load, dsthost->dsthost_refcnt);
+
+	WARN_ON(host_load > CAKE_QUEUES);
+
+	/* flow isolation (DRR++) */
+	if (flow->deficit <= 0) {
+		/* The shifted prandom_u32() is a way to apply dithering to
+		 * avoid accumulating roundoff errors
+		 */
+		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+				  (prandom_u32() >> 16)) >> 16;
+		list_move_tail(&flow->flowchain, &b->old_flows);
+
+		/* Keep all flows with deficits out of the sparse and decaying
+		 * rotations.  No non-empty flow can go into the decaying
+		 * rotation, so they can't get deficits
+		 */
+		if (flow->set == CAKE_SET_SPARSE) {
+			if (flow->head) {
+				b->sparse_flow_count--;
+				b->bulk_flow_count++;
+				flow->set = CAKE_SET_BULK;
+			} else {
+				/* we've moved it to the bulk rotation for
+				 * correct deficit accounting but we still want
+				 * to count it as a sparse flow, not a bulk one.
+				 */
+				flow->set = CAKE_SET_SPARSE_WAIT;
+			}
+		}
+		goto retry;
+	}
+
+	/* Retrieve a packet via the AQM */
+	while (1) {
+		skb = cake_dequeue_one(sch);
+		if (!skb) {
+			/* this queue was actually empty */
+			if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
+				b->unresponsive_flow_count--;
+
+			if (flow->cvars.p_drop || flow->cvars.count ||
+			    ktime_before(now, flow->cvars.drop_next)) {
+				/* keep in the flowchain until the state has
+				 * decayed to rest
+				 */
+				list_move_tail(&flow->flowchain,
+					       &b->decaying_flows);
+				if (flow->set == CAKE_SET_BULK) {
+					b->bulk_flow_count--;
+					b->decaying_flow_count++;
+				} else if (flow->set == CAKE_SET_SPARSE ||
+					   flow->set == CAKE_SET_SPARSE_WAIT) {
+					b->sparse_flow_count--;
+					b->decaying_flow_count++;
+				}
+				flow->set = CAKE_SET_DECAYING;
+			} else {
+				/* remove empty queue from the flowchain */
+				list_del_init(&flow->flowchain);
+				if (flow->set == CAKE_SET_SPARSE ||
+				    flow->set == CAKE_SET_SPARSE_WAIT)
+					b->sparse_flow_count--;
+				else if (flow->set == CAKE_SET_BULK)
+					b->bulk_flow_count--;
+				else
+					b->decaying_flow_count--;
+
+				flow->set = CAKE_SET_NONE;
+				srchost->srchost_refcnt--;
+				dsthost->dsthost_refcnt--;
+			}
+			goto begin;
+		}
+
+		/* Last packet in queue may be marked, shouldn't be dropped */
+		if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
+					(b->bulk_flow_count *
+					 !!(q->rate_flags &
+					    CAKE_FLAG_INGRESS))) ||
+		    !flow->head)
+			break;
+
+		/* drop this packet, get another one */
+		if (q->rate_flags & CAKE_FLAG_INGRESS) {
+			len = cake_advance_shaper(q, b, skb,
+						  now, true);
+			flow->deficit -= len;
+			b->tin_deficit -= len;
+		}
+		flow->dropped++;
+		b->tin_dropped++;
+		qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
+		qdisc_qstats_drop(sch);
+		kfree_skb(skb);
+		if (q->rate_flags & CAKE_FLAG_INGRESS)
+			goto retry;
+	}
+
+	b->tin_ecn_mark += !!flow->cvars.ecn_marked;
+	qdisc_bstats_update(sch, skb);
+
+	/* collect delay stats */
+	delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+	b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
+	b->peak_delay = cake_ewma(b->peak_delay, delay,
+				  delay > b->peak_delay ? 2 : 8);
+	b->base_delay = cake_ewma(b->base_delay, delay,
+				  delay < b->base_delay ? 2 : 8);
+
+	len = cake_advance_shaper(q, b, skb, now, false);
+	flow->deficit -= len;
+	b->tin_deficit -= len;
+
+	if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
+		u64 next = min(ktime_to_ns(q->time_next_packet),
+			       ktime_to_ns(q->failsafe_next_packet));
+
+		qdisc_watchdog_schedule_ns(&q->watchdog, next);
+	} else if (!sch->q.qlen) {
+		int i;
+
+		for (i = 0; i < q->tin_cnt; i++) {
+			if (q->tins[i].decaying_flow_count) {
+				ktime_t next = \
+					ktime_add_ns(now,
+						     q->tins[i].cparams.target);
+
+				qdisc_watchdog_schedule_ns(&q->watchdog,
+							   ktime_to_ns(next));
+				break;
+			}
+		}
+	}
+
+	if (q->overflow_timeout)
+		q->overflow_timeout--;
+
+	return skb;
+}
+
+static void cake_reset(struct Qdisc *sch)
+{
+	u32 c;
+
+	for (c = 0; c < CAKE_MAX_TINS; c++)
+		cake_clear_tin(sch, c);
+}
+
+static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
+	[TCA_CAKE_BASE_RATE64]   = { .type = NLA_U64 },
+	[TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 },
+	[TCA_CAKE_ATM]		 = { .type = NLA_U32 },
+	[TCA_CAKE_FLOW_MODE]     = { .type = NLA_U32 },
+	[TCA_CAKE_OVERHEAD]      = { .type = NLA_S32 },
+	[TCA_CAKE_RTT]		 = { .type = NLA_U32 },
+	[TCA_CAKE_TARGET]	 = { .type = NLA_U32 },
+	[TCA_CAKE_AUTORATE]      = { .type = NLA_U32 },
+	[TCA_CAKE_MEMORY]	 = { .type = NLA_U32 },
+	[TCA_CAKE_NAT]		 = { .type = NLA_U32 },
+	[TCA_CAKE_RAW]		 = { .type = NLA_U32 },
+	[TCA_CAKE_WASH]		 = { .type = NLA_U32 },
+	[TCA_CAKE_MPU]		 = { .type = NLA_U32 },
+	[TCA_CAKE_INGRESS]	 = { .type = NLA_U32 },
+	[TCA_CAKE_ACK_FILTER]	 = { .type = NLA_U32 },
+};
+
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+			  u64 target_ns, u64 rtt_est_ns)
+{
+	/* convert byte-rate into time-per-byte
+	 * so it will always unwedge in reasonable time.
+	 */
+	static const u64 MIN_RATE = 64;
+	u32 byte_target = mtu;
+	u64 byte_target_ns;
+	u8  rate_shft = 0;
+	u64 rate_ns = 0;
+
+	b->flow_quantum = 1514;
+	if (rate) {
+		b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
+		rate_shft = 34;
+		rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
+		rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
+		while (!!(rate_ns >> 34)) {
+			rate_ns >>= 1;
+			rate_shft--;
+		}
+	} /* else unlimited, ie. zero delay */
+
+	b->tin_rate_bps  = rate;
+	b->tin_rate_ns   = rate_ns;
+	b->tin_rate_shft = rate_shft;
+
+	byte_target_ns = (byte_target * rate_ns) >> rate_shft;
+
+	b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
+	b->cparams.interval = max(rtt_est_ns +
+				     b->cparams.target - target_ns,
+				     b->cparams.target * 2);
+	b->cparams.mtu_time = byte_target_ns;
+	b->cparams.p_inc = 1 << 24; /* 1/256 */
+	b->cparams.p_dec = 1 << 20; /* 1/4096 */
+}
+
+static int cake_config_besteffort(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[0];
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+
+	q->tin_cnt = 1;
+
+	q->tin_index = besteffort;
+	q->tin_order = normal_order;
+
+	cake_set_rate(b, rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	b->tin_quantum_band = 65535;
+	b->tin_quantum_prio = 65535;
+
+	return 0;
+}
+
+static int cake_config_precedence(struct Qdisc *sch)
+{
+	/* convert high-level (user visible) parameters into internal format */
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum1 = 256;
+	u32 quantum2 = 256;
+	u32 i;
+
+	q->tin_cnt = 8;
+	q->tin_index = precedence;
+	q->tin_order = normal_order;
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[i];
+
+		cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+			      us_to_ns(q->interval));
+
+		b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+		b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+		/* calculate next class's parameters */
+		rate  *= 7;
+		rate >>= 3;
+
+		quantum1  *= 3;
+		quantum1 >>= 1;
+
+		quantum2  *= 7;
+		quantum2 >>= 3;
+	}
+
+	return 0;
+}
+
+/*	List of known Diffserv codepoints:
+ *
+ *	Least Effort (CS1)
+ *	Best Effort (CS0)
+ *	Max Reliability & LLT "Lo" (TOS1)
+ *	Max Throughput (TOS2)
+ *	Min Delay (TOS4)
+ *	LLT "La" (TOS5)
+ *	Assured Forwarding 1 (AF1x) - x3
+ *	Assured Forwarding 2 (AF2x) - x3
+ *	Assured Forwarding 3 (AF3x) - x3
+ *	Assured Forwarding 4 (AF4x) - x3
+ *	Precedence Class 2 (CS2)
+ *	Precedence Class 3 (CS3)
+ *	Precedence Class 4 (CS4)
+ *	Precedence Class 5 (CS5)
+ *	Precedence Class 6 (CS6)
+ *	Precedence Class 7 (CS7)
+ *	Voice Admit (VA)
+ *	Expedited Forwarding (EF)
+
+ *	Total 25 codepoints.
+ */
+
+/*	List of traffic classes in RFC 4594:
+ *		(roughly descending order of contended priority)
+ *		(roughly ascending order of uncontended throughput)
+ *
+ *	Network Control (CS6,CS7)      - routing traffic
+ *	Telephony (EF,VA)         - aka. VoIP streams
+ *	Signalling (CS5)               - VoIP setup
+ *	Multimedia Conferencing (AF4x) - aka. video calls
+ *	Realtime Interactive (CS4)     - eg. games
+ *	Multimedia Streaming (AF3x)    - eg. YouTube, NetFlix, Twitch
+ *	Broadcast Video (CS3)
+ *	Low Latency Data (AF2x,TOS4)      - eg. database
+ *	Ops, Admin, Management (CS2,TOS1) - eg. ssh
+ *	Standard Service (CS0 & unrecognised codepoints)
+ *	High Throughput Data (AF1x,TOS2)  - eg. web traffic
+ *	Low Priority Data (CS1)           - eg. BitTorrent
+
+ *	Total 12 traffic classes.
+ */
+
+static int cake_config_diffserv8(struct Qdisc *sch)
+{
+/*	Pruned list of traffic classes for typical applications:
+ *
+ *		Network Control          (CS6, CS7)
+ *		Minimum Latency          (EF, VA, CS5, CS4)
+ *		Interactive Shell        (CS2, TOS1)
+ *		Low Latency Transactions (AF2x, TOS4)
+ *		Video Streaming          (AF4x, AF3x, CS3)
+ *		Bog Standard             (CS0 etc.)
+ *		High Throughput          (AF1x, TOS2)
+ *		Background Traffic       (CS1)
+ *
+ *		Total 8 traffic classes.
+ */
+
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum1 = 256;
+	u32 quantum2 = 256;
+	u32 i;
+
+	q->tin_cnt = 8;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv8;
+	q->tin_order = normal_order;
+
+	/* class characteristics */
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[i];
+
+		cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+			      us_to_ns(q->interval));
+
+		b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+		b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+		/* calculate next class's parameters */
+		rate  *= 7;
+		rate >>= 3;
+
+		quantum1  *= 3;
+		quantum1 >>= 1;
+
+		quantum2  *= 7;
+		quantum2 >>= 3;
+	}
+
+	return 0;
+}
+
+static int cake_config_diffserv4(struct Qdisc *sch)
+{
+/*  Further pruned list of traffic classes for four-class system:
+ *
+ *	    Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
+ *	    Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
+ *	    Best Effort        (CS0, AF1x, TOS2, and those not specified)
+ *	    Background Traffic (CS1)
+ *
+ *		Total 4 traffic classes.
+ */
+
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum = 1024;
+
+	q->tin_cnt = 4;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv4;
+	q->tin_order = bulk_order;
+
+	/* class characteristics */
+	cake_set_rate(&q->tins[0], rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[1], rate >> 4, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[2], rate >> 1, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[3], rate >> 2, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+
+	/* priority weights */
+	q->tins[0].tin_quantum_prio = quantum;
+	q->tins[1].tin_quantum_prio = quantum >> 4;
+	q->tins[2].tin_quantum_prio = quantum << 2;
+	q->tins[3].tin_quantum_prio = quantum << 4;
+
+	/* bandwidth-sharing weights */
+	q->tins[0].tin_quantum_band = quantum;
+	q->tins[1].tin_quantum_band = quantum >> 4;
+	q->tins[2].tin_quantum_band = quantum >> 1;
+	q->tins[3].tin_quantum_band = quantum >> 2;
+
+	return 0;
+}
+
+static int cake_config_diffserv3(struct Qdisc *sch)
+{
+/*  Simplified Diffserv structure with 3 tins.
+ *		Low Priority		(CS1)
+ *		Best Effort
+ *		Latency Sensitive	(TOS4, VA, EF, CS6, CS7)
+ */
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum = 1024;
+
+	q->tin_cnt = 3;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv3;
+	q->tin_order = bulk_order;
+
+	/* class characteristics */
+	cake_set_rate(&q->tins[0], rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[1], rate >> 4, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[2], rate >> 2, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+
+	/* priority weights */
+	q->tins[0].tin_quantum_prio = quantum;
+	q->tins[1].tin_quantum_prio = quantum >> 4;
+	q->tins[2].tin_quantum_prio = quantum << 4;
+
+	/* bandwidth-sharing weights */
+	q->tins[0].tin_quantum_band = quantum;
+	q->tins[1].tin_quantum_band = quantum >> 4;
+	q->tins[2].tin_quantum_band = quantum >> 2;
+
+	return 0;
+}
+
+static void cake_reconfigure(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int c, ft;
+
+	switch (q->tin_mode) {
+	case CAKE_DIFFSERV_BESTEFFORT:
+		ft = cake_config_besteffort(sch);
+		break;
+
+	case CAKE_DIFFSERV_PRECEDENCE:
+		ft = cake_config_precedence(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV8:
+		ft = cake_config_diffserv8(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV4:
+		ft = cake_config_diffserv4(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV3:
+	default:
+		ft = cake_config_diffserv3(sch);
+		break;
+	}
+
+	for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
+		cake_clear_tin(sch, c);
+		q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
+	}
+
+	q->rate_ns   = q->tins[ft].tin_rate_ns;
+	q->rate_shft = q->tins[ft].tin_rate_shft;
+
+	if (q->buffer_config_limit) {
+		q->buffer_limit = q->buffer_config_limit;
+	} else if (q->rate_bps) {
+		u64 t = q->rate_bps * q->interval;
+
+		do_div(t, USEC_PER_SEC / 4);
+		q->buffer_limit = max_t(u32, t, 4U << 20);
+	} else {
+		q->buffer_limit = ~0;
+	}
+
+	sch->flags &= ~TCQ_F_CAN_BYPASS;
+
+	q->buffer_limit = min(q->buffer_limit,
+			      max(sch->limit * psched_mtu(qdisc_dev(sch)),
+				  q->buffer_config_limit));
+}
+
+static int cake_change(struct Qdisc *sch, struct nlattr *opt,
+		       struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_CAKE_MAX + 1];
+	int err;
+
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CAKE_NAT]) {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+		q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+		q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+			!!nla_get_u32(tb[TCA_CAKE_NAT]);
+#else
+		NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
+				    "No conntrack support in kernel");
+		return -EOPNOTSUPP;
+#endif
+	}
+
+	if (tb[TCA_CAKE_BASE_RATE64])
+		q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+
+	if (tb[TCA_CAKE_DIFFSERV_MODE])
+		q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+
+	if (tb[TCA_CAKE_WASH]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
+			q->rate_flags |= CAKE_FLAG_WASH;
+		else
+			q->rate_flags &= ~CAKE_FLAG_WASH;
+	}
+
+	if (tb[TCA_CAKE_FLOW_MODE])
+		q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
+				(nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
+					CAKE_FLOW_MASK));
+
+	if (tb[TCA_CAKE_ATM])
+		q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+
+	if (tb[TCA_CAKE_OVERHEAD]) {
+		q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
+		q->rate_flags |= CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_RAW]) {
+		q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_MPU])
+		q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+
+	if (tb[TCA_CAKE_RTT]) {
+		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+
+		if (!q->interval)
+			q->interval = 1;
+	}
+
+	if (tb[TCA_CAKE_TARGET]) {
+		q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+
+		if (!q->target)
+			q->target = 1;
+	}
+
+	if (tb[TCA_CAKE_AUTORATE]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
+			q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+		else
+			q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+	}
+
+	if (tb[TCA_CAKE_INGRESS]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
+			q->rate_flags |= CAKE_FLAG_INGRESS;
+		else
+			q->rate_flags &= ~CAKE_FLAG_INGRESS;
+	}
+
+	if (tb[TCA_CAKE_ACK_FILTER])
+		q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
+
+	if (tb[TCA_CAKE_MEMORY])
+		q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+
+	if (q->rate_bps && q->rate_bps <= CAKE_SPLIT_GSO_THRESHOLD)
+		q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+	else
+		q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+
+	if (q->tins) {
+		sch_tree_lock(sch);
+		cake_reconfigure(sch);
+		sch_tree_unlock(sch);
+	}
+
+	return 0;
+}
+
+static void cake_destroy(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+
+	qdisc_watchdog_cancel(&q->watchdog);
+	tcf_block_put(q->block);
+	kvfree(q->tins);
+}
+
+static int cake_init(struct Qdisc *sch, struct nlattr *opt,
+		     struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int i, j, err;
+
+	sch->limit = 10240;
+	q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
+	q->flow_mode  = CAKE_FLOW_TRIPLE;
+
+	q->rate_bps = 0; /* unlimited by default */
+
+	q->interval = 100000; /* 100ms default */
+	q->target   =   5000; /* 5ms: codel RFC argues
+			       * for 5 to 10% of interval
+			       */
+
+	q->cur_tin = 0;
+	q->cur_flow  = 0;
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+
+	if (opt) {
+		int err = cake_change(sch, opt, extack);
+
+		if (err)
+			return err;
+	}
+
+	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+	if (err)
+		return err;
+
+	quantum_div[0] = ~0;
+	for (i = 1; i <= CAKE_QUEUES; i++)
+		quantum_div[i] = 65535 / i;
+
+	q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data),
+			   GFP_KERNEL);
+	if (!q->tins)
+		goto nomem;
+
+	for (i = 0; i < CAKE_MAX_TINS; i++) {
+		struct cake_tin_data *b = q->tins + i;
+
+		INIT_LIST_HEAD(&b->new_flows);
+		INIT_LIST_HEAD(&b->old_flows);
+		INIT_LIST_HEAD(&b->decaying_flows);
+		b->sparse_flow_count = 0;
+		b->bulk_flow_count = 0;
+		b->decaying_flow_count = 0;
+
+		for (j = 0; j < CAKE_QUEUES; j++) {
+			struct cake_flow *flow = b->flows + j;
+			u32 k = j * CAKE_MAX_TINS + i;
+
+			INIT_LIST_HEAD(&flow->flowchain);
+			cobalt_vars_init(&flow->cvars);
+
+			q->overflow_heap[k].t = i;
+			q->overflow_heap[k].b = j;
+			b->overflow_idx[j] = k;
+		}
+	}
+
+	cake_reconfigure(sch);
+	q->avg_peak_bandwidth = q->rate_bps;
+	q->min_netlen = ~0;
+	q->min_adjlen = ~0;
+	return 0;
+
+nomem:
+	cake_destroy(sch);
+	return -ENOMEM;
+}
+
+static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts;
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (!opts)
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
+			      TCA_CAKE_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
+			q->flow_mode & CAKE_FLOW_MASK))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
+			!!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_INGRESS,
+			!!(q->rate_flags & CAKE_FLAG_INGRESS)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_NAT,
+			!!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_WASH,
+			!!(q->rate_flags & CAKE_FLAG_WASH)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+		goto nla_put_failure;
+
+	if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+		if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
+			goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
+			!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	return -1;
+}
+
+static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP);
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tstats, *ts;
+	int i;
+
+	if (!stats)
+		return -1;
+
+#define PUT_STAT_U32(attr, data) do {				       \
+		if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+#define PUT_STAT_U64(attr, data) do {				       \
+		if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \
+					data, TCA_CAKE_STATS_PAD)) \
+			goto nla_put_failure;			       \
+	} while (0)
+
+	PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
+	PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
+	PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
+	PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
+	PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
+	PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
+	PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
+	PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+
+#undef PUT_STAT_U32
+#undef PUT_STAT_U64
+
+	tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS);
+	if (!tstats)
+		goto nla_put_failure;
+
+#define PUT_TSTAT_U32(attr, data) do {					\
+		if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \
+			goto nla_put_failure;				\
+	} while (0)
+#define PUT_TSTAT_U64(attr, data) do {					\
+		if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \
+					data, TCA_CAKE_TIN_STATS_PAD))	\
+			goto nla_put_failure;				\
+	} while (0)
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[q->tin_order[i]];
+
+		ts = nla_nest_start(d->skb, i + 1);
+		if (!ts)
+			goto nla_put_failure;
+
+		PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
+		PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
+		PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
+
+		PUT_TSTAT_U32(TARGET_US,
+			      ktime_to_us(ns_to_ktime(b->cparams.target)));
+		PUT_TSTAT_U32(INTERVAL_US,
+			      ktime_to_us(ns_to_ktime(b->cparams.interval)));
+
+		PUT_TSTAT_U32(SENT_PACKETS, b->packets);
+		PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
+		PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
+		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
+
+		PUT_TSTAT_U32(PEAK_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->peak_delay)));
+		PUT_TSTAT_U32(AVG_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->avge_delay)));
+		PUT_TSTAT_U32(BASE_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->base_delay)));
+
+		PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
+		PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
+		PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
+
+		PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
+					    b->decaying_flow_count);
+		PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
+		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
+		PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
+
+		PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
+		nla_nest_end(d->skb, ts);
+	}
+
+#undef PUT_TSTAT_U32
+#undef PUT_TSTAT_U64
+
+	nla_nest_end(d->skb, tstats);
+	return nla_nest_end(d->skb, stats);
+
+nla_put_failure:
+	nla_nest_cancel(d->skb, stats);
+	return -1;
+}
+
+static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long cake_find(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent,
+			       u32 classid)
+{
+	return 0;
+}
+
+static void cake_unbind(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl,
+					struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return q->block;
+}
+
+static int cake_dump_class(struct Qdisc *sch, unsigned long cl,
+			   struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	const struct cake_flow *flow = NULL;
+	struct gnet_stats_queue qs = { 0 };
+	struct nlattr *stats;
+	u32 idx = cl - 1;
+
+	if (idx < CAKE_QUEUES * q->tin_cnt) {
+		const struct cake_tin_data *b = \
+			&q->tins[q->tin_order[idx / CAKE_QUEUES]];
+		const struct sk_buff *skb;
+
+		flow = &b->flows[idx % CAKE_QUEUES];
+
+		if (flow->head) {
+			sch_tree_lock(sch);
+			skb = flow->head;
+			while (skb) {
+				qs.qlen++;
+				skb = skb->next;
+			}
+			sch_tree_unlock(sch);
+		}
+		qs.backlog = b->backlogs[idx % CAKE_QUEUES];
+		qs.drops = flow->dropped;
+	}
+	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
+		return -1;
+	if (flow) {
+		ktime_t now = ktime_get();
+
+		stats = nla_nest_start(d->skb, TCA_STATS_APP);
+		if (!stats)
+			return -1;
+
+#define PUT_STAT_U32(attr, data) do {				       \
+		if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+#define PUT_STAT_S32(attr, data) do {				       \
+		if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+
+		PUT_STAT_S32(DEFICIT, flow->deficit);
+		PUT_STAT_U32(DROPPING, flow->cvars.dropping);
+		PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
+		PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
+		if (flow->cvars.p_drop) {
+			PUT_STAT_S32(BLUE_TIMER_US,
+				     ktime_to_us(
+					     ktime_sub(now,
+						     flow->cvars.blue_timer)));
+		}
+		if (flow->cvars.dropping) {
+			PUT_STAT_S32(DROP_NEXT_US,
+				     ktime_to_us(
+					     ktime_sub(now,
+						       flow->cvars.drop_next)));
+		}
+
+		if (nla_nest_end(d->skb, stats) < 0)
+			return -1;
+	}
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(d->skb, stats);
+	return -1;
+}
+
+static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	unsigned int i, j;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[q->tin_order[i]];
+
+		for (j = 0; j < CAKE_QUEUES; j++) {
+			if (list_empty(&b->flows[j].flowchain) ||
+			    arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) {
+				arg->stop = 1;
+				break;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static const struct Qdisc_class_ops cake_class_ops = {
+	.leaf		=	cake_leaf,
+	.find		=	cake_find,
+	.tcf_block	=	cake_tcf_block,
+	.bind_tcf	=	cake_bind,
+	.unbind_tcf	=	cake_unbind,
+	.dump		=	cake_dump_class,
+	.dump_stats	=	cake_dump_class_stats,
+	.walk		=	cake_walk,
+};
+
+static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
+	.cl_ops		=	&cake_class_ops,
+	.id		=	"cake",
+	.priv_size	=	sizeof(struct cake_sched_data),
+	.enqueue	=	cake_enqueue,
+	.dequeue	=	cake_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	cake_init,
+	.reset		=	cake_reset,
+	.destroy	=	cake_destroy,
+	.change		=	cake_change,
+	.dump		=	cake_dump,
+	.dump_stats	=	cake_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init cake_module_init(void)
+{
+	return register_qdisc(&cake_qdisc_ops);
+}
+
+static void __exit cake_module_exit(void)
+{
+	unregister_qdisc(&cake_qdisc_ops);
+}
+
+module_init(cake_module_init)
+module_exit(cake_module_exit)
+MODULE_AUTHOR("Jonathan Morton");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("The CAKE shaper.");
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index d36dc26c6c51..07991e1025c7 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -105,7 +105,7 @@ do_test_span_vlan_dir_ips()
 	# Install the capture as skip_hw to avoid double-counting of packets.
 	# The traffic is meant for local box anyway, so will be trapped to
 	# kernel.
-	vlan_capture_install $dev "skip_hw vlan_id $vid"
+	vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
 	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
 	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
 	vlan_capture_uninstall $dev