diff options
49 files changed, 5436 insertions, 1307 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index f4c042be0216..77c37fb0b6a6 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1834,6 +1834,16 @@ stable_secret - IPv6 address By default the stable secret is unset. +addr_gen_mode - INTEGER + Defines how link-local and autoconf addresses are generated. + + 0: generate address based on EUI64 (default) + 1: do no generate a link-local address, use EUI64 for addresses generated + from autoconf + 2: generate stable privacy addresses, using the secret from + stable_secret (RFC7217) + 3: generate stable privacy addresses, using a random secret if unset + drop_unicast_in_l2_multicast - BOOLEAN Drop any unicast IPv6 packets that are received in link-layer multicast (or broadcast) frames. diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 2369b2f0d95a..d83233ae4a15 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -4088,7 +4088,7 @@ static void cnic_cm_free_mem(struct cnic_dev *dev) { struct cnic_local *cp = dev->cnic_priv; - kfree(cp->csk_tbl); + kvfree(cp->csk_tbl); cp->csk_tbl = NULL; cnic_free_id_tbl(&cp->csk_port_tbl); } @@ -4098,8 +4098,8 @@ static int cnic_cm_alloc_mem(struct cnic_dev *dev) struct cnic_local *cp = dev->cnic_priv; u32 port_id; - cp->csk_tbl = kcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock), - GFP_KERNEL); + cp->csk_tbl = kvcalloc(MAX_CM_SK_TBL_SZ, sizeof(struct cnic_sock), + GFP_KERNEL); if (!cp->csk_tbl) return -ENOMEM; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 516c883e1613..511606fd1b20 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2962,6 +2962,169 @@ static const struct file_operations chcr_stats_debugfs_fops = { .llseek = seq_lseek, .release = single_release, }; + +#define PRINT_ADAP_STATS(string, value) \ + seq_printf(seq, "%-25s %-20llu\n", (string), \ + (unsigned long long)(value)) + +#define PRINT_CH_STATS(string, value) \ +do { \ + seq_printf(seq, "%-25s ", (string)); \ + for (i = 0; i < adap->params.arch.nchan; i++) \ + seq_printf(seq, "%-20llu ", \ + (unsigned long long)stats.value[i]); \ + seq_printf(seq, "\n"); \ +} while (0) + +#define PRINT_CH_STATS2(string, value) \ +do { \ + seq_printf(seq, "%-25s ", (string)); \ + for (i = 0; i < adap->params.arch.nchan; i++) \ + seq_printf(seq, "%-20llu ", \ + (unsigned long long)stats[i].value); \ + seq_printf(seq, "\n"); \ +} while (0) + +static void show_tcp_stats(struct seq_file *seq) +{ + struct adapter *adap = seq->private; + struct tp_tcp_stats v4, v6; + + spin_lock(&adap->stats_lock); + t4_tp_get_tcp_stats(adap, &v4, &v6, false); + spin_unlock(&adap->stats_lock); + + PRINT_ADAP_STATS("tcp_ipv4_out_rsts:", v4.tcp_out_rsts); + PRINT_ADAP_STATS("tcp_ipv4_in_segs:", v4.tcp_in_segs); + PRINT_ADAP_STATS("tcp_ipv4_out_segs:", v4.tcp_out_segs); + PRINT_ADAP_STATS("tcp_ipv4_retrans_segs:", v4.tcp_retrans_segs); + PRINT_ADAP_STATS("tcp_ipv6_out_rsts:", v6.tcp_out_rsts); + PRINT_ADAP_STATS("tcp_ipv6_in_segs:", v6.tcp_in_segs); + PRINT_ADAP_STATS("tcp_ipv6_out_segs:", v6.tcp_out_segs); + PRINT_ADAP_STATS("tcp_ipv6_retrans_segs:", v6.tcp_retrans_segs); +} + +static void show_ddp_stats(struct seq_file *seq) +{ + struct adapter *adap = seq->private; + struct tp_usm_stats stats; + + spin_lock(&adap->stats_lock); + t4_get_usm_stats(adap, &stats, false); + spin_unlock(&adap->stats_lock); + + PRINT_ADAP_STATS("usm_ddp_frames:", stats.frames); + PRINT_ADAP_STATS("usm_ddp_octets:", stats.octets); + PRINT_ADAP_STATS("usm_ddp_drops:", stats.drops); +} + +static void show_rdma_stats(struct seq_file *seq) +{ + struct adapter *adap = seq->private; + struct tp_rdma_stats stats; + + spin_lock(&adap->stats_lock); + t4_tp_get_rdma_stats(adap, &stats, false); + spin_unlock(&adap->stats_lock); + + PRINT_ADAP_STATS("rdma_no_rqe_mod_defer:", stats.rqe_dfr_mod); + PRINT_ADAP_STATS("rdma_no_rqe_pkt_defer:", stats.rqe_dfr_pkt); +} + +static void show_tp_err_adapter_stats(struct seq_file *seq) +{ + struct adapter *adap = seq->private; + struct tp_err_stats stats; + + spin_lock(&adap->stats_lock); + t4_tp_get_err_stats(adap, &stats, false); + spin_unlock(&adap->stats_lock); + + PRINT_ADAP_STATS("tp_err_ofld_no_neigh:", stats.ofld_no_neigh); + PRINT_ADAP_STATS("tp_err_ofld_cong_defer:", stats.ofld_cong_defer); +} + +static void show_cpl_stats(struct seq_file *seq) +{ + struct adapter *adap = seq->private; + struct tp_cpl_stats stats; + u8 i; + + spin_lock(&adap->stats_lock); + t4_tp_get_cpl_stats(adap, &stats, false); + spin_unlock(&adap->stats_lock); + + PRINT_CH_STATS("tp_cpl_requests:", req); + PRINT_CH_STATS("tp_cpl_responses:", rsp); +} + +static void show_tp_err_channel_stats(struct seq_file *seq) +{ + struct adapter *adap = seq->private; + struct tp_err_stats stats; + u8 i; + + spin_lock(&adap->stats_lock); + t4_tp_get_err_stats(adap, &stats, false); + spin_unlock(&adap->stats_lock); + + PRINT_CH_STATS("tp_mac_in_errs:", mac_in_errs); + PRINT_CH_STATS("tp_hdr_in_errs:", hdr_in_errs); + PRINT_CH_STATS("tp_tcp_in_errs:", tcp_in_errs); + PRINT_CH_STATS("tp_tcp6_in_errs:", tcp6_in_errs); + PRINT_CH_STATS("tp_tnl_cong_drops:", tnl_cong_drops); + PRINT_CH_STATS("tp_tnl_tx_drops:", tnl_tx_drops); + PRINT_CH_STATS("tp_ofld_vlan_drops:", ofld_vlan_drops); + PRINT_CH_STATS("tp_ofld_chan_drops:", ofld_chan_drops); +} + +static void show_fcoe_stats(struct seq_file *seq) +{ + struct adapter *adap = seq->private; + struct tp_fcoe_stats stats[NCHAN]; + u8 i; + + spin_lock(&adap->stats_lock); + for (i = 0; i < adap->params.arch.nchan; i++) + t4_get_fcoe_stats(adap, i, &stats[i], false); + spin_unlock(&adap->stats_lock); + + PRINT_CH_STATS2("fcoe_octets_ddp", octets_ddp); + PRINT_CH_STATS2("fcoe_frames_ddp", frames_ddp); + PRINT_CH_STATS2("fcoe_frames_drop", frames_drop); +} + +#undef PRINT_CH_STATS2 +#undef PRINT_CH_STATS +#undef PRINT_ADAP_STATS + +static int tp_stats_show(struct seq_file *seq, void *v) +{ + struct adapter *adap = seq->private; + + seq_puts(seq, "\n--------Adapter Stats--------\n"); + show_tcp_stats(seq); + show_ddp_stats(seq); + show_rdma_stats(seq); + show_tp_err_adapter_stats(seq); + + seq_puts(seq, "\n-------- Channel Stats --------\n"); + if (adap->params.arch.nchan == NCHAN) + seq_printf(seq, "%-25s %-20s %-20s %-20s %-20s\n", + " ", "channel 0", "channel 1", + "channel 2", "channel 3"); + else + seq_printf(seq, "%-25s %-20s %-20s\n", + " ", "channel 0", "channel 1"); + show_cpl_stats(seq); + show_tp_err_channel_stats(seq); + show_fcoe_stats(seq); + + return 0; +} + +DEFINE_SIMPLE_DEBUGFS_FILE(tp_stats); + /* Add an array of Debug FS files. */ void add_debugfs_files(struct adapter *adap, @@ -3038,6 +3201,7 @@ int t4_setup_debugfs(struct adapter *adap) { "blocked_fl", &blocked_fl_fops, 0600, 0 }, { "meminfo", &meminfo_fops, 0400, 0 }, { "crypto", &chcr_stats_debugfs_fops, 0400, 0 }, + { "tp_stats", &tp_stats_debugfs_fops, 0400, 0 }, }; /* Debug FS nodes common to all T5 and later adapters. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index ddb8b9eba6bf..a14a290a56ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -115,42 +115,10 @@ static char adapter_stats_strings[][ETH_GSTRING_LEN] = { "db_drop ", "db_full ", "db_empty ", - "tcp_ipv4_out_rsts ", - "tcp_ipv4_in_segs ", - "tcp_ipv4_out_segs ", - "tcp_ipv4_retrans_segs ", - "tcp_ipv6_out_rsts ", - "tcp_ipv6_in_segs ", - "tcp_ipv6_out_segs ", - "tcp_ipv6_retrans_segs ", - "usm_ddp_frames ", - "usm_ddp_octets ", - "usm_ddp_drops ", - "rdma_no_rqe_mod_defer ", - "rdma_no_rqe_pkt_defer ", - "tp_err_ofld_no_neigh ", - "tp_err_ofld_cong_defer ", "write_coal_success ", "write_coal_fail ", }; -static char channel_stats_strings[][ETH_GSTRING_LEN] = { - "--------Channel--------- ", - "tp_cpl_requests ", - "tp_cpl_responses ", - "tp_mac_in_errs ", - "tp_hdr_in_errs ", - "tp_tcp_in_errs ", - "tp_tcp6_in_errs ", - "tp_tnl_cong_drops ", - "tp_tnl_tx_drops ", - "tp_ofld_vlan_drops ", - "tp_ofld_chan_drops ", - "fcoe_octets_ddp ", - "fcoe_frames_ddp ", - "fcoe_frames_drop ", -}; - static char loopback_stats_strings[][ETH_GSTRING_LEN] = { "-------Loopback----------- ", "octets_ok ", @@ -187,7 +155,6 @@ static int get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return ARRAY_SIZE(stats_strings) + ARRAY_SIZE(adapter_stats_strings) + - ARRAY_SIZE(channel_stats_strings) + ARRAY_SIZE(loopback_stats_strings); case ETH_SS_PRIV_FLAGS: return ARRAY_SIZE(cxgb4_priv_flags_strings); @@ -252,9 +219,6 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data) memcpy(data, adapter_stats_strings, sizeof(adapter_stats_strings)); data += sizeof(adapter_stats_strings); - memcpy(data, channel_stats_strings, - sizeof(channel_stats_strings)); - data += sizeof(channel_stats_strings); memcpy(data, loopback_stats_strings, sizeof(loopback_stats_strings)); } else if (stringset == ETH_SS_PRIV_FLAGS) { @@ -280,41 +244,10 @@ struct adapter_stats { u64 db_drop; u64 db_full; u64 db_empty; - u64 tcp_v4_out_rsts; - u64 tcp_v4_in_segs; - u64 tcp_v4_out_segs; - u64 tcp_v4_retrans_segs; - u64 tcp_v6_out_rsts; - u64 tcp_v6_in_segs; - u64 tcp_v6_out_segs; - u64 tcp_v6_retrans_segs; - u64 frames; - u64 octets; - u64 drops; - u64 rqe_dfr_mod; - u64 rqe_dfr_pkt; - u64 ofld_no_neigh; - u64 ofld_cong_defer; u64 wc_success; u64 wc_fail; }; -struct channel_stats { - u64 cpl_req; - u64 cpl_rsp; - u64 mac_in_errs; - u64 hdr_in_errs; - u64 tcp_in_errs; - u64 tcp6_in_errs; - u64 tnl_cong_drops; - u64 tnl_tx_drops; - u64 ofld_vlan_drops; - u64 ofld_chan_drops; - u64 octets_ddp; - u64 frames_ddp; - u64 frames_drop; -}; - static void collect_sge_port_stats(const struct adapter *adap, const struct port_info *p, struct queue_port_stats *s) @@ -337,45 +270,14 @@ static void collect_sge_port_stats(const struct adapter *adap, static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s) { - struct tp_tcp_stats v4, v6; - struct tp_rdma_stats rdma_stats; - struct tp_err_stats err_stats; - struct tp_usm_stats usm_stats; u64 val1, val2; memset(s, 0, sizeof(*s)); - spin_lock(&adap->stats_lock); - t4_tp_get_tcp_stats(adap, &v4, &v6, false); - t4_tp_get_rdma_stats(adap, &rdma_stats, false); - t4_get_usm_stats(adap, &usm_stats, false); - t4_tp_get_err_stats(adap, &err_stats, false); - spin_unlock(&adap->stats_lock); - s->db_drop = adap->db_stats.db_drop; s->db_full = adap->db_stats.db_full; s->db_empty = adap->db_stats.db_empty; - s->tcp_v4_out_rsts = v4.tcp_out_rsts; - s->tcp_v4_in_segs = v4.tcp_in_segs; - s->tcp_v4_out_segs = v4.tcp_out_segs; - s->tcp_v4_retrans_segs = v4.tcp_retrans_segs; - s->tcp_v6_out_rsts = v6.tcp_out_rsts; - s->tcp_v6_in_segs = v6.tcp_in_segs; - s->tcp_v6_out_segs = v6.tcp_out_segs; - s->tcp_v6_retrans_segs = v6.tcp_retrans_segs; - - if (is_offload(adap)) { - s->frames = usm_stats.frames; - s->octets = usm_stats.octets; - s->drops = usm_stats.drops; - s->rqe_dfr_mod = rdma_stats.rqe_dfr_mod; - s->rqe_dfr_pkt = rdma_stats.rqe_dfr_pkt; - } - - s->ofld_no_neigh = err_stats.ofld_no_neigh; - s->ofld_cong_defer = err_stats.ofld_cong_defer; - if (!is_t4(adap->params.chip)) { int v; @@ -389,36 +291,6 @@ static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s) } } -static void collect_channel_stats(struct adapter *adap, struct channel_stats *s, - u8 i) -{ - struct tp_cpl_stats cpl_stats; - struct tp_err_stats err_stats; - struct tp_fcoe_stats fcoe_stats; - - memset(s, 0, sizeof(*s)); - - spin_lock(&adap->stats_lock); - t4_tp_get_cpl_stats(adap, &cpl_stats, false); - t4_tp_get_err_stats(adap, &err_stats, false); - t4_get_fcoe_stats(adap, i, &fcoe_stats, false); - spin_unlock(&adap->stats_lock); - - s->cpl_req = cpl_stats.req[i]; - s->cpl_rsp = cpl_stats.rsp[i]; - s->mac_in_errs = err_stats.mac_in_errs[i]; - s->hdr_in_errs = err_stats.hdr_in_errs[i]; - s->tcp_in_errs = err_stats.tcp_in_errs[i]; - s->tcp6_in_errs = err_stats.tcp6_in_errs[i]; - s->tnl_cong_drops = err_stats.tnl_cong_drops[i]; - s->tnl_tx_drops = err_stats.tnl_tx_drops[i]; - s->ofld_vlan_drops = err_stats.ofld_vlan_drops[i]; - s->ofld_chan_drops = err_stats.ofld_chan_drops[i]; - s->octets_ddp = fcoe_stats.octets_ddp; - s->frames_ddp = fcoe_stats.frames_ddp; - s->frames_drop = fcoe_stats.frames_drop; -} - static void get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -439,11 +311,6 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats, data += sizeof(struct adapter_stats) / sizeof(u64); *data++ = (u64)pi->port_id; - collect_channel_stats(adapter, (struct channel_stats *)data, - pi->port_id); - data += sizeof(struct channel_stats) / sizeof(u64); - - *data++ = (u64)pi->port_id; memset(&s, 0, sizeof(s)); t4_get_lb_stats(adapter, pi->port_id, &s); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index ebb46c472d52..6807bc3a44fb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -3412,7 +3412,9 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, c.iqsize = htons(iq->size); c.iqaddr = cpu_to_be64(iq->phys_addr); if (cong >= 0) - c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F); + c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F | + FW_IQ_CMD_IQTYPE_V(cong ? FW_IQ_IQTYPE_NIC + : FW_IQ_IQTYPE_OFLD)); if (fl) { enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index f1967cf6d43c..5dc6c4154af8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1472,6 +1472,12 @@ enum fw_iq_type { FW_IQ_TYPE_NO_FL_INT_CAP }; +enum fw_iq_iqtype { + FW_IQ_IQTYPE_OTHER, + FW_IQ_IQTYPE_NIC, + FW_IQ_IQTYPE_OFLD, +}; + struct fw_iq_cmd { __be32 op_to_vfn; __be32 alloc_to_len16; @@ -1586,6 +1592,12 @@ struct fw_iq_cmd { #define FW_IQ_CMD_IQFLINTISCSIC_S 26 #define FW_IQ_CMD_IQFLINTISCSIC_V(x) ((x) << FW_IQ_CMD_IQFLINTISCSIC_S) +#define FW_IQ_CMD_IQTYPE_S 24 +#define FW_IQ_CMD_IQTYPE_M 0x3 +#define FW_IQ_CMD_IQTYPE_V(x) ((x) << FW_IQ_CMD_IQTYPE_S) +#define FW_IQ_CMD_IQTYPE_G(x) \ + (((x) >> FW_IQ_CMD_IQTYPE_S) & FW_IQ_CMD_IQTYPE_M) + #define FW_IQ_CMD_FL0CNGCHMAP_S 20 #define FW_IQ_CMD_FL0CNGCHMAP_V(x) ((x) << FW_IQ_CMD_FL0CNGCHMAP_S) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 81a66cce7fa8..18834619bb3a 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -12,6 +12,7 @@ #ifndef _MVPP2_H_ #define _MVPP2_H_ +#include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/phy.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bbd2fd0b2e06..c7791d036e9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -349,7 +349,8 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); - rq->mpwqe.info = kvzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), + rq->mpwqe.info = kvzalloc_node(array_size(wq_sz, + sizeof(*rq->mpwqe.info)), GFP_KERNEL, cpu_to_node(c->cpu)); if (!rq->mpwqe.info) return -ENOMEM; @@ -969,7 +970,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - sq->db.di = kvzalloc_node(sizeof(*sq->db.di) * wq_sz, + sq->db.di = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.di)), GFP_KERNEL, numa); if (!sq->db.di) { mlx5e_free_xdpsq_db(sq); @@ -1028,7 +1029,8 @@ static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) { u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - sq->db.ico_wqe = kvzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz, + sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz, + sizeof(*sq->db.ico_wqe)), GFP_KERNEL, numa); if (!sq->db.ico_wqe) return -ENOMEM; @@ -1083,9 +1085,11 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; - sq->db.dma_fifo = kvzalloc_node(df_sz * sizeof(*sq->db.dma_fifo), + sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, + sizeof(*sq->db.dma_fifo)), GFP_KERNEL, numa); - sq->db.wqe_info = kvzalloc_node(wq_sz * sizeof(*sq->db.wqe_info), + sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, + sizeof(*sq->db.wqe_info)), GFP_KERNEL, numa); if (!sq->db.dma_fifo || !sq->db.wqe_info) { mlx5e_free_txqsq_db(sq); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 0cadcabfe86f..981e621ef9c2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -15,11 +15,16 @@ mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o spectrum_router.o \ - spectrum_kvdl.o spectrum_acl_tcam.o \ - spectrum_acl.o spectrum_flower.o \ - spectrum_cnt.o spectrum_fid.o \ - spectrum_ipip.o spectrum_acl_flex_actions.o \ - spectrum_mr.o spectrum_mr_tcam.o \ + spectrum1_kvdl.o spectrum_kvdl.o \ + spectrum_acl_tcam.o spectrum_acl_ctcam.o \ + spectrum1_acl_tcam.o \ + spectrum_acl.o \ + spectrum_flower.o spectrum_cnt.o \ + spectrum_fid.o spectrum_ipip.o \ + spectrum_acl_flex_actions.o \ + spectrum_acl_flex_keys.o \ + spectrum1_mr_tcam.o \ + spectrum_mr_tcam.o spectrum_mr.o \ spectrum_qdisc.o spectrum_span.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index b32a00972e83..bf645215f514 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -43,6 +43,7 @@ struct mlxsw_afk { struct list_head key_info_list; unsigned int max_blocks; + const struct mlxsw_afk_ops *ops; const struct mlxsw_afk_block *blocks; unsigned int blocks_count; }; @@ -69,8 +70,7 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk) } struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks, - const struct mlxsw_afk_block *blocks, - unsigned int blocks_count) + const struct mlxsw_afk_ops *ops) { struct mlxsw_afk *mlxsw_afk; @@ -79,8 +79,9 @@ struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks, return NULL; INIT_LIST_HEAD(&mlxsw_afk->key_info_list); mlxsw_afk->max_blocks = max_blocks; - mlxsw_afk->blocks = blocks; - mlxsw_afk->blocks_count = blocks_count; + mlxsw_afk->ops = ops; + mlxsw_afk->blocks = ops->blocks; + mlxsw_afk->blocks_count = ops->blocks_count; WARN_ON(!mlxsw_afk_blocks_check(mlxsw_afk)); return mlxsw_afk; } @@ -415,45 +416,8 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values, } EXPORT_SYMBOL(mlxsw_afk_values_add_buf); -static void mlxsw_afk_encode_u32(const struct mlxsw_item *storage_item, - const struct mlxsw_item *output_item, - char *storage, char *output_indexed) -{ - u32 value; - - value = __mlxsw_item_get32(storage, storage_item, 0); - __mlxsw_item_set32(output_indexed, output_item, 0, value); -} - -static void mlxsw_afk_encode_buf(const struct mlxsw_item *storage_item, - const struct mlxsw_item *output_item, - char *storage, char *output_indexed) -{ - char *storage_data = __mlxsw_item_data(storage, storage_item, 0); - char *output_data = __mlxsw_item_data(output_indexed, output_item, 0); - size_t len = output_item->size.bytes; - - memcpy(output_data, storage_data, len); -} - -#define MLXSW_AFK_KEY_BLOCK_SIZE 16 - -static void mlxsw_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, - int block_index, char *storage, char *output) -{ - char *output_indexed = output + block_index * MLXSW_AFK_KEY_BLOCK_SIZE; - const struct mlxsw_item *storage_item = &elinst->info->item; - const struct mlxsw_item *output_item = &elinst->item; - - if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32) - mlxsw_afk_encode_u32(storage_item, output_item, - storage, output_indexed); - else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF) - mlxsw_afk_encode_buf(storage_item, output_item, - storage, output_indexed); -} - -void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info, +void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_key_info *key_info, struct mlxsw_afk_element_values *values, char *key, char *mask) { @@ -466,10 +430,10 @@ void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info, &block_index); if (!elinst) continue; - mlxsw_afk_encode_one(elinst, block_index, - values->storage.key, key); - mlxsw_afk_encode_one(elinst, block_index, - values->storage.mask, mask); + mlxsw_afk->ops->encode_one(elinst, block_index, + values->storage.key, key); + mlxsw_afk->ops->encode_one(elinst, block_index, + values->storage.mask, mask); } } EXPORT_SYMBOL(mlxsw_afk_encode); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 154eccd64019..441636cd13d8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -216,9 +216,15 @@ mlxsw_afk_element_usage_subset(struct mlxsw_afk_element_usage *elusage_small, struct mlxsw_afk; +struct mlxsw_afk_ops { + const struct mlxsw_afk_block *blocks; + unsigned int blocks_count; + void (*encode_one)(const struct mlxsw_afk_element_inst *elinst, + int block_index, char *storage, char *output); +}; + struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks, - const struct mlxsw_afk_block *blocks, - unsigned int blocks_count); + const struct mlxsw_afk_ops *ops); void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk); struct mlxsw_afk_key_info; @@ -251,7 +257,8 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values, enum mlxsw_afk_element element, const char *key_value, const char *mask_value, unsigned int len); -void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info, +void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_key_info *key_info, struct mlxsw_afk_element_values *values, char *key, char *mask); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 29c51ae91241..ccf4aae91630 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2402,6 +2402,15 @@ MLXSW_ITEM32(reg, ptce2, op, 0x00, 20, 3); */ MLXSW_ITEM32(reg, ptce2, offset, 0x00, 0, 16); +/* reg_ptce2_priority + * Priority of the rule, higher values win. The range is 1..cap_kvd_size-1. + * Note: priority does not have to be unique per rule. + * Within a region, higher priority should have lower offset (no limitation + * between regions in a multi-region). + * Access: RW + */ +MLXSW_ITEM32(reg, ptce2, priority, 0x04, 0, 24); + /* reg_ptce2_tcam_region_info * Opaque object that represents the TCAM region. * Access: Index @@ -2437,12 +2446,13 @@ MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0, static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid, enum mlxsw_reg_ptce2_op op, const char *tcam_region_info, - u16 offset) + u16 offset, u32 priority) { MLXSW_REG_ZERO(ptce2, payload); mlxsw_reg_ptce2_v_set(payload, valid); mlxsw_reg_ptce2_op_set(payload, op); mlxsw_reg_ptce2_offset_set(payload, offset); + mlxsw_reg_ptce2_priority_set(payload, priority); mlxsw_reg_ptce2_tcam_region_info_memcpy_to(payload, tcam_region_info); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index fd9299ccec72..f672a7b71de7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -42,6 +42,8 @@ enum mlxsw_res_id { MLXSW_RES_ID_KVD_SIZE, MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE, MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE, + MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE, + MLXSW_RES_ID_MAX_KVD_ACTION_SETS, MLXSW_RES_ID_MAX_TRAP_GROUPS, MLXSW_RES_ID_CQE_V0, MLXSW_RES_ID_CQE_V1, @@ -83,6 +85,8 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_KVD_SIZE] = 0x1001, [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002, [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003, + [MLXSW_RES_ID_MAX_KVD_LINEAR_RANGE] = 0x1005, + [MLXSW_RES_ID_MAX_KVD_ACTION_SETS] = 0x1007, [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201, [MLXSW_RES_ID_CQE_V0] = 0x2210, [MLXSW_RES_ID_CQE_V1] = 0x2211, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cba0e9ba7dea..5b2f4c6f5e57 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -74,15 +74,22 @@ #include "spectrum_span.h" #include "../mlxfw/mlxfw.h" -#define MLXSW_FWREV_MAJOR 13 -#define MLXSW_FWREV_MINOR 1620 -#define MLXSW_FWREV_SUBMINOR 192 -#define MLXSW_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100) +#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100) -#define MLXSW_SP_FW_FILENAME \ - "mellanox/mlxsw_spectrum-" __stringify(MLXSW_FWREV_MAJOR) \ - "." __stringify(MLXSW_FWREV_MINOR) \ - "." __stringify(MLXSW_FWREV_SUBMINOR) ".mfa2" +#define MLXSW_SP1_FWREV_MAJOR 13 +#define MLXSW_SP1_FWREV_MINOR 1620 +#define MLXSW_SP1_FWREV_SUBMINOR 192 + +static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { + .major = MLXSW_SP1_FWREV_MAJOR, + .minor = MLXSW_SP1_FWREV_MINOR, + .subminor = MLXSW_SP1_FWREV_SUBMINOR, +}; + +#define MLXSW_SP1_FW_FILENAME \ + "mellanox/mlxsw_spectrum-" __stringify(MLXSW_SP1_FWREV_MAJOR) \ + "." __stringify(MLXSW_SP1_FWREV_MINOR) \ + "." __stringify(MLXSW_SP1_FWREV_SUBMINOR) ".mfa2" static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp_driver_version[] = "1.0"; @@ -338,29 +345,35 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) { const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev; + const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev; + const char *fw_filename = mlxsw_sp->fw_filename; const struct firmware *firmware; int err; + /* Don't check if driver does not require it */ + if (!req_rev || !fw_filename) + return 0; + /* Validate driver & FW are compatible */ - if (rev->major != MLXSW_FWREV_MAJOR) { + if (rev->major != req_rev->major) { WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n", - rev->major, MLXSW_FWREV_MAJOR); + rev->major, req_rev->major); return -EINVAL; } - if (MLXSW_FWREV_MINOR_TO_BRANCH(rev->minor) == - MLXSW_FWREV_MINOR_TO_BRANCH(MLXSW_FWREV_MINOR)) + if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) == + MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor)) return 0; dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n", rev->major, rev->minor, rev->subminor); dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n", - MLXSW_SP_FW_FILENAME); + fw_filename); - err = request_firmware_direct(&firmware, MLXSW_SP_FW_FILENAME, + err = request_firmware_direct(&firmware, fw_filename, mlxsw_sp->bus_info->dev); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Could not request firmware file %s\n", - MLXSW_SP_FW_FILENAME); + fw_filename); return err; } @@ -3621,7 +3634,13 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); int err; + mlxsw_sp->req_rev = &mlxsw_sp1_fw_rev; + mlxsw_sp->fw_filename = MLXSW_SP1_FW_FILENAME; + mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops; mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops; + mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops; + mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops; + mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; mlxsw_sp->core = mlxsw_core; mlxsw_sp->bus_info = mlxsw_bus_info; @@ -3880,7 +3899,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core) if (err) return err; - err = mlxsw_sp_kvdl_resources_register(mlxsw_core); + err = mlxsw_sp1_kvdl_resources_register(mlxsw_core); if (err) return err; @@ -4741,4 +4760,4 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Jiri Pirko <[email protected]>"); MODULE_DESCRIPTION("Mellanox Spectrum driver"); MODULE_DEVICE_TABLE(pci, mlxsw_sp_pci_id_table); -MODULE_FIRMWARE(MLXSW_SP_FW_FILENAME); +MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index f5294c9a4c92..eb3d3c0bdb03 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -145,6 +145,9 @@ struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; struct mlxsw_sp_kvdl; +struct mlxsw_sp_kvdl_ops; +struct mlxsw_sp_mr_tcam_ops; +struct mlxsw_sp_acl_tcam_ops; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -168,7 +171,13 @@ struct mlxsw_sp { struct mlxsw_sp_span_entry *entries; int entries_count; } span; + const struct mlxsw_fw_rev *req_rev; + const char *fw_filename; + const struct mlxsw_sp_kvdl_ops *kvdl_ops; const struct mlxsw_afa_ops *afa_ops; + const struct mlxsw_afk_ops *afk_ops; + const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; + const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; }; static inline struct mlxsw_sp_upper * @@ -436,15 +445,59 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); /* spectrum_kvdl.c */ +enum mlxsw_sp_kvdl_entry_type { + MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + MLXSW_SP_KVDL_ENTRY_TYPE_PBS, + MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR, +}; + +static inline unsigned int +mlxsw_sp_kvdl_entry_size(enum mlxsw_sp_kvdl_entry_type type) +{ + switch (type) { + case MLXSW_SP_KVDL_ENTRY_TYPE_ADJ: /* fall through */ + case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: /* fall through */ + case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: /* fall through */ + case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: /* fall through */ + default: + return 1; + } +} + +struct mlxsw_sp_kvdl_ops { + size_t priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv); + int (*alloc)(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, u32 *p_entry_index); + void (*free)(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, int entry_index); + int (*alloc_size_query)(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_count); + int (*resources_register)(struct mlxsw_sp *mlxsw_sp, void *priv); +}; + int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, - u32 *p_entry_index); -void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); -int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, - unsigned int entry_count, - unsigned int *p_alloc_size); -int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core); +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, u32 *p_entry_index); +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, int entry_index); +int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_count); + +/* spectrum1_kvdl.c */ +extern const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops; +int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core); struct mlxsw_sp_acl_rule_info { unsigned int priority; @@ -453,44 +506,14 @@ struct mlxsw_sp_acl_rule_info { unsigned int counter_index; }; -enum mlxsw_sp_acl_profile { - MLXSW_SP_ACL_PROFILE_FLOWER, -}; - -struct mlxsw_sp_acl_profile_ops { - size_t ruleset_priv_size; - int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp, - void *priv, void *ruleset_priv); - void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv); - int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, - struct mlxsw_sp_port *mlxsw_sp_port, - bool ingress); - void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, - struct mlxsw_sp_port *mlxsw_sp_port, - bool ingress); - u16 (*ruleset_group_id)(void *ruleset_priv); - size_t rule_priv_size; - int (*rule_add)(struct mlxsw_sp *mlxsw_sp, - void *ruleset_priv, void *rule_priv, - struct mlxsw_sp_acl_rule_info *rulei); - void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv); - int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, - bool *activity); -}; - -struct mlxsw_sp_acl_ops { - size_t priv_size; - int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); - void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv); - const struct mlxsw_sp_acl_profile_ops * - (*profile_ops)(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_acl_profile profile); -}; - struct mlxsw_sp_acl_block; struct mlxsw_sp_acl_ruleset; /* spectrum_acl.c */ +enum mlxsw_sp_acl_profile { + MLXSW_SP_ACL_PROFILE_FLOWER, +}; + struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block); unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block); @@ -583,11 +606,45 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); /* spectrum_acl_tcam.c */ -extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops; +struct mlxsw_sp_acl_tcam; +struct mlxsw_sp_acl_tcam_region; + +struct mlxsw_sp_acl_tcam_ops { + enum mlxsw_reg_ptar_key_type key_type; + size_t priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv, + struct mlxsw_sp_acl_tcam *tcam); + void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv); + size_t region_priv_size; + int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv, + struct mlxsw_sp_acl_tcam_region *region); + void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv); + size_t chunk_priv_size; + void (*chunk_init)(void *region_priv, void *chunk_priv, + unsigned int priority); + void (*chunk_fini)(void *chunk_priv); + size_t entry_priv_size; + int (*entry_add)(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei); + void (*entry_del)(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv); + int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + bool *activity); +}; + +/* spectrum1_acl_tcam.c */ +extern const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops; /* spectrum_acl_flex_actions.c */ extern const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops; +/* spectrum_acl_flex_keys.c */ +extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops; + /* spectrum_flower.c */ int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, @@ -635,4 +692,37 @@ void mlxsw_sp_port_fids_fini(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp); +/* spectrum_mr.c */ +enum mlxsw_sp_mr_route_prio { + MLXSW_SP_MR_ROUTE_PRIO_SG, + MLXSW_SP_MR_ROUTE_PRIO_STARG, + MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + __MLXSW_SP_MR_ROUTE_PRIO_MAX +}; + +#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1) + +struct mlxsw_sp_mr_route_key; + +struct mlxsw_sp_mr_tcam_ops { + size_t priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + void (*fini)(void *priv); + size_t route_priv_size; + int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block, + enum mlxsw_sp_mr_route_prio prio); + void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key); + int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block); +}; + +/* spectrum1_mr_tcam.c */ +extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops; + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c new file mode 100644 index 000000000000..04f0c9cfae24 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c @@ -0,0 +1,245 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017-2018 Jiri Pirko <[email protected]> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> + +#include "reg.h" +#include "core.h" +#include "spectrum.h" +#include "spectrum_acl_tcam.h" + +struct mlxsw_sp1_acl_tcam_region { + struct mlxsw_sp_acl_ctcam_region cregion; + struct mlxsw_sp_acl_tcam_region *region; + struct { + struct mlxsw_sp_acl_ctcam_chunk cchunk; + struct mlxsw_sp_acl_ctcam_entry centry; + struct mlxsw_sp_acl_rule_info *rulei; + } catchall; +}; + +struct mlxsw_sp1_acl_tcam_chunk { + struct mlxsw_sp_acl_ctcam_chunk cchunk; +}; + +struct mlxsw_sp1_acl_tcam_entry { + struct mlxsw_sp_acl_ctcam_entry centry; +}; + +static int mlxsw_sp1_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv, + struct mlxsw_sp_acl_tcam *tcam) +{ + return 0; +} + +static void mlxsw_sp1_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +{ +} + +static int +mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_rule_info *rulei; + int err; + + mlxsw_sp_acl_ctcam_chunk_init(®ion->cregion, + ®ion->catchall.cchunk, + MLXSW_SP_ACL_TCAM_CATCHALL_PRIO); + rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl); + if (IS_ERR(rulei)) { + err = PTR_ERR(rulei); + goto err_rulei_create; + } + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (WARN_ON(err)) + goto err_rulei_act_continue; + err = mlxsw_sp_acl_rulei_commit(rulei); + if (err) + goto err_rulei_commit; + err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, ®ion->cregion, + ®ion->catchall.cchunk, + ®ion->catchall.centry, + rulei, false); + if (err) + goto err_entry_add; + region->catchall.rulei = rulei; + return 0; + +err_entry_add: +err_rulei_commit: +err_rulei_act_continue: + mlxsw_sp_acl_rulei_destroy(rulei); +err_rulei_create: + mlxsw_sp_acl_ctcam_chunk_fini(®ion->catchall.cchunk); + return err; +} + +static void +mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei; + + mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, ®ion->cregion, + ®ion->catchall.cchunk, + ®ion->catchall.centry); + mlxsw_sp_acl_rulei_destroy(rulei); + mlxsw_sp_acl_ctcam_chunk_fini(®ion->catchall.cchunk); +} + +static int +mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, + struct mlxsw_sp_acl_tcam_region *_region) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + int err; + + err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, ®ion->cregion, + _region); + if (err) + return err; + err = mlxsw_sp1_acl_ctcam_region_catchall_add(mlxsw_sp, region); + if (err) + goto err_catchall_add; + region->region = _region; + return 0; + +err_catchall_add: + mlxsw_sp_acl_ctcam_region_fini(®ion->cregion); + return err; +} + +static void +mlxsw_sp1_acl_tcam_region_fini(struct mlxsw_sp *mlxsw_sp, void *region_priv) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + + mlxsw_sp1_acl_ctcam_region_catchall_del(mlxsw_sp, region); + mlxsw_sp_acl_ctcam_region_fini(®ion->cregion); +} + +static void mlxsw_sp1_acl_tcam_chunk_init(void *region_priv, void *chunk_priv, + unsigned int priority) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv; + + mlxsw_sp_acl_ctcam_chunk_init(®ion->cregion, &chunk->cchunk, + priority); +} + +static void mlxsw_sp1_acl_tcam_chunk_fini(void *chunk_priv) +{ + struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv; + + mlxsw_sp_acl_ctcam_chunk_fini(&chunk->cchunk); +} + +static int mlxsw_sp1_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv; + struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv; + + return mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, ®ion->cregion, + &chunk->cchunk, &entry->centry, + rulei, false); +} + +static void mlxsw_sp1_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *chunk_priv, + void *entry_priv) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + struct mlxsw_sp1_acl_tcam_chunk *chunk = chunk_priv; + struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv; + + mlxsw_sp_acl_ctcam_entry_del(mlxsw_sp, ®ion->cregion, + &chunk->cchunk, &entry->centry); +} + +static int +mlxsw_sp1_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *_region, + unsigned int offset, + bool *activity) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + int err; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ, + _region->tcam_region_info, offset, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + if (err) + return err; + *activity = mlxsw_reg_ptce2_a_get(ptce2_pl); + return 0; +} + +static int +mlxsw_sp1_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + bool *activity) +{ + struct mlxsw_sp1_acl_tcam_region *region = region_priv; + struct mlxsw_sp1_acl_tcam_entry *entry = entry_priv; + unsigned int offset; + + offset = mlxsw_sp_acl_ctcam_entry_offset(&entry->centry); + return mlxsw_sp1_acl_tcam_region_entry_activity_get(mlxsw_sp, + region->region, + offset, activity); +} + +const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops = { + .key_type = MLXSW_REG_PTAR_KEY_TYPE_FLEX, + .priv_size = 0, + .init = mlxsw_sp1_acl_tcam_init, + .fini = mlxsw_sp1_acl_tcam_fini, + .region_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_region), + .region_init = mlxsw_sp1_acl_tcam_region_init, + .region_fini = mlxsw_sp1_acl_tcam_region_fini, + .chunk_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_chunk), + .chunk_init = mlxsw_sp1_acl_tcam_chunk_init, + .chunk_fini = mlxsw_sp1_acl_tcam_chunk_fini, + .entry_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_entry), + .entry_add = mlxsw_sp1_acl_tcam_entry_add, + .entry_del = mlxsw_sp1_acl_tcam_entry_del, + .entry_activity_get = mlxsw_sp1_acl_tcam_entry_activity_get, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c new file mode 100644 index 000000000000..0d4583894a97 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c @@ -0,0 +1,459 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum1_kvdl.c + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2018 Jiri Pirko <[email protected]> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum.h" + +#define MLXSW_SP1_KVDL_SINGLE_BASE 0 +#define MLXSW_SP1_KVDL_SINGLE_SIZE 16384 +#define MLXSW_SP1_KVDL_SINGLE_END \ + (MLXSW_SP1_KVDL_SINGLE_SIZE + MLXSW_SP1_KVDL_SINGLE_BASE - 1) + +#define MLXSW_SP1_KVDL_CHUNKS_BASE \ + (MLXSW_SP1_KVDL_SINGLE_BASE + MLXSW_SP1_KVDL_SINGLE_SIZE) +#define MLXSW_SP1_KVDL_CHUNKS_SIZE 49152 +#define MLXSW_SP1_KVDL_CHUNKS_END \ + (MLXSW_SP1_KVDL_CHUNKS_SIZE + MLXSW_SP1_KVDL_CHUNKS_BASE - 1) + +#define MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE \ + (MLXSW_SP1_KVDL_CHUNKS_BASE + MLXSW_SP1_KVDL_CHUNKS_SIZE) +#define MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE \ + (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE) +#define MLXSW_SP1_KVDL_LARGE_CHUNKS_END \ + (MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP1_KVDL_LARGE_CHUNKS_BASE - 1) + +#define MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE 1 +#define MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE 32 +#define MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512 + +struct mlxsw_sp1_kvdl_part_info { + unsigned int part_index; + unsigned int start_index; + unsigned int end_index; + unsigned int alloc_size; + enum mlxsw_sp_resource_id resource_id; +}; + +enum mlxsw_sp1_kvdl_part_id { + MLXSW_SP1_KVDL_PART_ID_SINGLE, + MLXSW_SP1_KVDL_PART_ID_CHUNKS, + MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS, +}; + +#define MLXSW_SP1_KVDL_PART_INFO(id) \ +[MLXSW_SP1_KVDL_PART_ID_##id] = { \ + .start_index = MLXSW_SP1_KVDL_##id##_BASE, \ + .end_index = MLXSW_SP1_KVDL_##id##_END, \ + .alloc_size = MLXSW_SP1_KVDL_##id##_ALLOC_SIZE, \ + .resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id, \ +} + +static const struct mlxsw_sp1_kvdl_part_info mlxsw_sp1_kvdl_parts_info[] = { + MLXSW_SP1_KVDL_PART_INFO(SINGLE), + MLXSW_SP1_KVDL_PART_INFO(CHUNKS), + MLXSW_SP1_KVDL_PART_INFO(LARGE_CHUNKS), +}; + +#define MLXSW_SP1_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp1_kvdl_parts_info) + +struct mlxsw_sp1_kvdl_part { + struct mlxsw_sp1_kvdl_part_info info; + unsigned long usage[0]; /* Entries */ +}; + +struct mlxsw_sp1_kvdl { + struct mlxsw_sp1_kvdl_part *parts[MLXSW_SP1_KVDL_PARTS_INFO_LEN]; +}; + +static struct mlxsw_sp1_kvdl_part * +mlxsw_sp1_kvdl_alloc_size_part(struct mlxsw_sp1_kvdl *kvdl, + unsigned int alloc_size) +{ + struct mlxsw_sp1_kvdl_part *part, *min_part = NULL; + int i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) { + part = kvdl->parts[i]; + if (alloc_size <= part->info.alloc_size && + (!min_part || + part->info.alloc_size <= min_part->info.alloc_size)) + min_part = part; + } + + return min_part ?: ERR_PTR(-ENOBUFS); +} + +static struct mlxsw_sp1_kvdl_part * +mlxsw_sp1_kvdl_index_part(struct mlxsw_sp1_kvdl *kvdl, u32 kvdl_index) +{ + struct mlxsw_sp1_kvdl_part *part; + int i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) { + part = kvdl->parts[i]; + if (kvdl_index >= part->info.start_index && + kvdl_index <= part->info.end_index) + return part; + } + + return ERR_PTR(-EINVAL); +} + +static u32 +mlxsw_sp1_kvdl_to_kvdl_index(const struct mlxsw_sp1_kvdl_part_info *info, + unsigned int entry_index) +{ + return info->start_index + entry_index * info->alloc_size; +} + +static unsigned int +mlxsw_sp1_kvdl_to_entry_index(const struct mlxsw_sp1_kvdl_part_info *info, + u32 kvdl_index) +{ + return (kvdl_index - info->start_index) / info->alloc_size; +} + +static int mlxsw_sp1_kvdl_part_alloc(struct mlxsw_sp1_kvdl_part *part, + u32 *p_kvdl_index) +{ + const struct mlxsw_sp1_kvdl_part_info *info = &part->info; + unsigned int entry_index, nr_entries; + + nr_entries = (info->end_index - info->start_index + 1) / + info->alloc_size; + entry_index = find_first_zero_bit(part->usage, nr_entries); + if (entry_index == nr_entries) + return -ENOBUFS; + __set_bit(entry_index, part->usage); + + *p_kvdl_index = mlxsw_sp1_kvdl_to_kvdl_index(info, entry_index); + + return 0; +} + +static void mlxsw_sp1_kvdl_part_free(struct mlxsw_sp1_kvdl_part *part, + u32 kvdl_index) +{ + const struct mlxsw_sp1_kvdl_part_info *info = &part->info; + unsigned int entry_index; + + entry_index = mlxsw_sp1_kvdl_to_entry_index(info, kvdl_index); + __clear_bit(entry_index, part->usage); +} + +static int mlxsw_sp1_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + u32 *p_entry_index) +{ + struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + /* Find partition with smallest allocation size satisfying the + * requested size. + */ + part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count); + if (IS_ERR(part)) + return PTR_ERR(part); + + return mlxsw_sp1_kvdl_part_alloc(part, p_entry_index); +} + +static void mlxsw_sp1_kvdl_free(struct mlxsw_sp *mlxsw_sp, void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, int entry_index) +{ + struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = mlxsw_sp1_kvdl_index_part(kvdl, entry_index); + if (IS_ERR(part)) + return; + mlxsw_sp1_kvdl_part_free(part, entry_index); +} + +static int mlxsw_sp1_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, + void *priv, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_size) +{ + struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = mlxsw_sp1_kvdl_alloc_size_part(kvdl, entry_count); + if (IS_ERR(part)) + return PTR_ERR(part); + + *p_alloc_size = part->info.alloc_size; + + return 0; +} + +static void mlxsw_sp1_kvdl_part_update(struct mlxsw_sp1_kvdl_part *part, + struct mlxsw_sp1_kvdl_part *part_prev, + unsigned int size) +{ + if (!part_prev) { + part->info.end_index = size - 1; + } else { + part->info.start_index = part_prev->info.end_index + 1; + part->info.end_index = part->info.start_index + size - 1; + } +} + +static struct mlxsw_sp1_kvdl_part * +mlxsw_sp1_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp1_kvdl_part_info *info, + struct mlxsw_sp1_kvdl_part *part_prev) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp1_kvdl_part *part; + bool need_update = true; + unsigned int nr_entries; + size_t usage_size; + u64 resource_size; + int err; + + err = devlink_resource_size_get(devlink, info->resource_id, + &resource_size); + if (err) { + need_update = false; + resource_size = info->end_index - info->start_index + 1; + } + + nr_entries = div_u64(resource_size, info->alloc_size); + usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long); + part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); + if (!part) + return ERR_PTR(-ENOMEM); + + memcpy(&part->info, info, sizeof(part->info)); + + if (need_update) + mlxsw_sp1_kvdl_part_update(part, part_prev, resource_size); + return part; +} + +static void mlxsw_sp1_kvdl_part_fini(struct mlxsw_sp1_kvdl_part *part) +{ + kfree(part); +} + +static int mlxsw_sp1_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_kvdl *kvdl) +{ + const struct mlxsw_sp1_kvdl_part_info *info; + struct mlxsw_sp1_kvdl_part *part_prev = NULL; + int err, i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) { + info = &mlxsw_sp1_kvdl_parts_info[i]; + kvdl->parts[i] = mlxsw_sp1_kvdl_part_init(mlxsw_sp, info, + part_prev); + if (IS_ERR(kvdl->parts[i])) { + err = PTR_ERR(kvdl->parts[i]); + goto err_kvdl_part_init; + } + part_prev = kvdl->parts[i]; + } + return 0; + +err_kvdl_part_init: + for (i--; i >= 0; i--) + mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]); + return err; +} + +static void mlxsw_sp1_kvdl_parts_fini(struct mlxsw_sp1_kvdl *kvdl) +{ + int i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) + mlxsw_sp1_kvdl_part_fini(kvdl->parts[i]); +} + +static u64 mlxsw_sp1_kvdl_part_occ(struct mlxsw_sp1_kvdl_part *part) +{ + const struct mlxsw_sp1_kvdl_part_info *info = &part->info; + unsigned int nr_entries; + int bit = -1; + u64 occ = 0; + + nr_entries = (info->end_index - + info->start_index + 1) / + info->alloc_size; + while ((bit = find_next_bit(part->usage, nr_entries, bit + 1)) + < nr_entries) + occ += info->alloc_size; + return occ; +} + +static u64 mlxsw_sp1_kvdl_occ_get(void *priv) +{ + const struct mlxsw_sp1_kvdl *kvdl = priv; + u64 occ = 0; + int i; + + for (i = 0; i < MLXSW_SP1_KVDL_PARTS_INFO_LEN; i++) + occ += mlxsw_sp1_kvdl_part_occ(kvdl->parts[i]); + + return occ; +} + +static u64 mlxsw_sp1_kvdl_single_occ_get(void *priv) +{ + const struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_SINGLE]; + return mlxsw_sp1_kvdl_part_occ(part); +} + +static u64 mlxsw_sp1_kvdl_chunks_occ_get(void *priv) +{ + const struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_CHUNKS]; + return mlxsw_sp1_kvdl_part_occ(part); +} + +static u64 mlxsw_sp1_kvdl_large_chunks_occ_get(void *priv) +{ + const struct mlxsw_sp1_kvdl *kvdl = priv; + struct mlxsw_sp1_kvdl_part *part; + + part = kvdl->parts[MLXSW_SP1_KVDL_PART_ID_LARGE_CHUNKS]; + return mlxsw_sp1_kvdl_part_occ(part); +} + +static int mlxsw_sp1_kvdl_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp1_kvdl *kvdl = priv; + int err; + + err = mlxsw_sp1_kvdl_parts_init(mlxsw_sp, kvdl); + if (err) + return err; + devlink_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR, + mlxsw_sp1_kvdl_occ_get, + kvdl); + devlink_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, + mlxsw_sp1_kvdl_single_occ_get, + kvdl); + devlink_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, + mlxsw_sp1_kvdl_chunks_occ_get, + kvdl); + devlink_resource_occ_get_register(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, + mlxsw_sp1_kvdl_large_chunks_occ_get, + kvdl); + return 0; +} + +static void mlxsw_sp1_kvdl_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp1_kvdl *kvdl = priv; + + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS); + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS); + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE); + devlink_resource_occ_get_unregister(devlink, + MLXSW_SP_RESOURCE_KVD_LINEAR); + mlxsw_sp1_kvdl_parts_fini(kvdl); +} + +const struct mlxsw_sp_kvdl_ops mlxsw_sp1_kvdl_ops = { + .priv_size = sizeof(struct mlxsw_sp1_kvdl), + .init = mlxsw_sp1_kvdl_init, + .fini = mlxsw_sp1_kvdl_fini, + .alloc = mlxsw_sp1_kvdl_alloc, + .free = mlxsw_sp1_kvdl_free, + .alloc_size_query = mlxsw_sp1_kvdl_alloc_size_query, +}; + +int mlxsw_sp1_kvdl_resources_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + static struct devlink_resource_size_params size_params; + u32 kvdl_max_size; + int err; + + kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) - + MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) - + MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE); + + devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, + MLXSW_SP1_KVDL_SINGLE_ALLOC_SIZE, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES, + MLXSW_SP1_KVDL_SINGLE_SIZE, + MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, + MLXSW_SP_RESOURCE_KVD_LINEAR, + &size_params); + if (err) + return err; + + devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, + MLXSW_SP1_KVDL_CHUNKS_ALLOC_SIZE, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS, + MLXSW_SP1_KVDL_CHUNKS_SIZE, + MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, + MLXSW_SP_RESOURCE_KVD_LINEAR, + &size_params); + if (err) + return err; + + devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, + MLXSW_SP1_KVDL_LARGE_CHUNKS_ALLOC_SIZE, + DEVLINK_RESOURCE_UNIT_ENTRY); + err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS, + MLXSW_SP1_KVDL_LARGE_CHUNKS_SIZE, + MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, + MLXSW_SP_RESOURCE_KVD_LINEAR, + &size_params); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c new file mode 100644 index 000000000000..fc649fe7134e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c @@ -0,0 +1,374 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum1_mr_tcam.c + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <[email protected]> + * Copyright (c) 2018 Jiri Pirko <[email protected]> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/parman.h> + +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_actions.h" +#include "spectrum_mr.h" + +struct mlxsw_sp1_mr_tcam_region { + struct mlxsw_sp *mlxsw_sp; + enum mlxsw_reg_rtar_key_type rtar_key_type; + struct parman *parman; + struct parman_prio *parman_prios; +}; + +struct mlxsw_sp1_mr_tcam { + struct mlxsw_sp1_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX]; +}; + +struct mlxsw_sp1_mr_tcam_route { + struct parman_item parman_item; + struct parman_prio *parman_prio; +}; + +static int mlxsw_sp1_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp, + struct parman_item *parman_item, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index, + key->vrid, + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, + ntohl(key->group.addr4), + ntohl(key->group_mask.addr4), + ntohl(key->source.addr4), + ntohl(key->source_mask.addr4), + mlxsw_afa_block_first_set(afa_block)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index, + key->vrid, + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, + key->group.addr6, + key->group_mask.addr6, + key->source.addr6, + key->source_mask.addr6, + mlxsw_afa_block_first_set(afa_block)); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int mlxsw_sp1_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, + struct parman_item *parman_item, + struct mlxsw_sp_mr_route_key *key) +{ + struct in6_addr zero_addr = IN6ADDR_ANY_INIT; + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, + key->vrid, 0, 0, 0, 0, 0, 0, NULL); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index, + key->vrid, 0, 0, zero_addr, zero_addr, + zero_addr, zero_addr, NULL); + break; + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static struct mlxsw_sp1_mr_tcam_region * +mlxsw_sp1_mr_tcam_protocol_region(struct mlxsw_sp1_mr_tcam *mr_tcam, + enum mlxsw_sp_l3proto proto) +{ + return &mr_tcam->tcam_regions[proto]; +} + +static int +mlxsw_sp1_mr_tcam_route_parman_item_add(struct mlxsw_sp1_mr_tcam *mr_tcam, + struct mlxsw_sp1_mr_tcam_route *route, + struct mlxsw_sp_mr_route_key *key, + enum mlxsw_sp_mr_route_prio prio) +{ + struct mlxsw_sp1_mr_tcam_region *tcam_region; + int err; + + tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto); + err = parman_item_add(tcam_region->parman, + &tcam_region->parman_prios[prio], + &route->parman_item); + if (err) + return err; + + route->parman_prio = &tcam_region->parman_prios[prio]; + return 0; +} + +static void +mlxsw_sp1_mr_tcam_route_parman_item_remove(struct mlxsw_sp1_mr_tcam *mr_tcam, + struct mlxsw_sp1_mr_tcam_route *route, + struct mlxsw_sp_mr_route_key *key) +{ + struct mlxsw_sp1_mr_tcam_region *tcam_region; + + tcam_region = mlxsw_sp1_mr_tcam_protocol_region(mr_tcam, key->proto); + parman_item_remove(tcam_region->parman, + route->parman_prio, &route->parman_item); +} + +static int +mlxsw_sp1_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block, + enum mlxsw_sp_mr_route_prio prio) +{ + struct mlxsw_sp1_mr_tcam_route *route = route_priv; + struct mlxsw_sp1_mr_tcam *mr_tcam = priv; + int err; + + err = mlxsw_sp1_mr_tcam_route_parman_item_add(mr_tcam, route, + key, prio); + if (err) + return err; + + err = mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + key, afa_block); + if (err) + goto err_route_replace; + return 0; + +err_route_replace: + mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key); + return err; +} + +static void +mlxsw_sp1_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_key *key) +{ + struct mlxsw_sp1_mr_tcam_route *route = route_priv; + struct mlxsw_sp1_mr_tcam *mr_tcam = priv; + + mlxsw_sp1_mr_tcam_route_remove(mlxsw_sp, &route->parman_item, key); + mlxsw_sp1_mr_tcam_route_parman_item_remove(mr_tcam, route, key); +} + +static int +mlxsw_sp1_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + struct mlxsw_sp1_mr_tcam_route *route = route_priv; + + return mlxsw_sp1_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + key, afa_block); +} + +#define MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP 16 + +static int +mlxsw_sp1_mr_tcam_region_alloc(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE, + mr_tcam_region->rtar_key_type, + MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void +mlxsw_sp1_mr_tcam_region_free(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE, + mr_tcam_region->rtar_key_type, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static int mlxsw_sp1_mr_tcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE, + mr_tcam_region->rtar_key_type, new_count); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void mlxsw_sp1_mr_tcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp1_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rrcr_pl[MLXSW_REG_RRCR_LEN]; + + mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE, + from_index, count, + mr_tcam_region->rtar_key_type, to_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl); +} + +static const struct parman_ops mlxsw_sp1_mr_tcam_region_parman_ops = { + .base_count = MLXSW_SP1_MR_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP1_MR_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp1_mr_tcam_region_parman_resize, + .move = mlxsw_sp1_mr_tcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +static int +mlxsw_sp1_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_mr_tcam_region *mr_tcam_region, + enum mlxsw_reg_rtar_key_type rtar_key_type) +{ + struct parman_prio *parman_prios; + struct parman *parman; + int err; + int i; + + mr_tcam_region->rtar_key_type = rtar_key_type; + mr_tcam_region->mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp1_mr_tcam_region_alloc(mr_tcam_region); + if (err) + return err; + + parman = parman_create(&mlxsw_sp1_mr_tcam_region_parman_ops, + mr_tcam_region); + if (!parman) { + err = -ENOMEM; + goto err_parman_create; + } + mr_tcam_region->parman = parman; + + parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, + sizeof(*parman_prios), GFP_KERNEL); + if (!parman_prios) { + err = -ENOMEM; + goto err_parman_prios_alloc; + } + mr_tcam_region->parman_prios = parman_prios; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_init(mr_tcam_region->parman, + &mr_tcam_region->parman_prios[i], i); + return 0; + +err_parman_prios_alloc: + parman_destroy(parman); +err_parman_create: + mlxsw_sp1_mr_tcam_region_free(mr_tcam_region); + return err; +} + +static void +mlxsw_sp1_mr_tcam_region_fini(struct mlxsw_sp1_mr_tcam_region *mr_tcam_region) +{ + int i; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_fini(&mr_tcam_region->parman_prios[i]); + kfree(mr_tcam_region->parman_prios); + parman_destroy(mr_tcam_region->parman); + mlxsw_sp1_mr_tcam_region_free(mr_tcam_region); +} + +static int mlxsw_sp1_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp1_mr_tcam *mr_tcam = priv; + struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0]; + u32 rtar_key; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES)) + return -EIO; + + rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST; + err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp, + ®ion[MLXSW_SP_L3_PROTO_IPV4], + rtar_key); + if (err) + return err; + + rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST; + err = mlxsw_sp1_mr_tcam_region_init(mlxsw_sp, + ®ion[MLXSW_SP_L3_PROTO_IPV6], + rtar_key); + if (err) + goto err_ipv6_region_init; + + return 0; + +err_ipv6_region_init: + mlxsw_sp1_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV4]); + return err; +} + +static void mlxsw_sp1_mr_tcam_fini(void *priv) +{ + struct mlxsw_sp1_mr_tcam *mr_tcam = priv; + struct mlxsw_sp1_mr_tcam_region *region = &mr_tcam->tcam_regions[0]; + + mlxsw_sp1_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV6]); + mlxsw_sp1_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV4]); +} + +const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp1_mr_tcam), + .init = mlxsw_sp1_mr_tcam_init, + .fini = mlxsw_sp1_mr_tcam_fini, + .route_priv_size = sizeof(struct mlxsw_sp1_mr_tcam_route), + .route_create = mlxsw_sp1_mr_tcam_route_create, + .route_destroy = mlxsw_sp1_mr_tcam_route_destroy, + .route_update = mlxsw_sp1_mr_tcam_route_update, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index b40569c67ddf..217621d79e26 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -48,13 +48,12 @@ #include "spectrum.h" #include "core_acl_flex_keys.h" #include "core_acl_flex_actions.h" -#include "spectrum_acl_flex_keys.h" +#include "spectrum_acl_tcam.h" struct mlxsw_sp_acl { struct mlxsw_sp *mlxsw_sp; struct mlxsw_afk *afk; struct mlxsw_sp_fid *dummy_fid; - const struct mlxsw_sp_acl_ops *ops; struct rhashtable ruleset_ht; struct list_head rules; struct { @@ -62,8 +61,7 @@ struct mlxsw_sp_acl { unsigned long interval; /* ms */ #define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000 } rule_activity_update; - unsigned long priv[0]; - /* priv has to be always the last item */ + struct mlxsw_sp_acl_tcam tcam; }; struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl) @@ -339,7 +337,7 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_rhashtable_init; - err = ops->ruleset_add(mlxsw_sp, acl->priv, ruleset->priv); + err = ops->ruleset_add(mlxsw_sp, &acl->tcam, ruleset->priv); if (err) goto err_ops_ruleset_add; @@ -409,7 +407,7 @@ mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl *acl = mlxsw_sp->acl; struct mlxsw_sp_acl_ruleset *ruleset; - ops = acl->ops->profile_ops(mlxsw_sp, profile); + ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile); if (!ops) return ERR_PTR(-EINVAL); ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, block, chain_index, ops); @@ -427,7 +425,7 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl *acl = mlxsw_sp->acl; struct mlxsw_sp_acl_ruleset *ruleset; - ops = acl->ops->profile_ops(mlxsw_sp, profile); + ops = mlxsw_sp_acl_tcam_profile_ops(mlxsw_sp, profile); if (!ops) return ERR_PTR(-EINVAL); @@ -634,7 +632,8 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_sp_acl_ruleset_ref_inc(ruleset); - rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, GFP_KERNEL); + rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp), + GFP_KERNEL); if (!rule) { err = -ENOMEM; goto err_alloc; @@ -825,20 +824,20 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) { - const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops; struct mlxsw_sp_fid *fid; struct mlxsw_sp_acl *acl; + size_t alloc_size; int err; - acl = kzalloc(sizeof(*acl) + acl_ops->priv_size, GFP_KERNEL); + alloc_size = sizeof(*acl) + mlxsw_sp_acl_tcam_priv_size(mlxsw_sp); + acl = kzalloc(alloc_size, GFP_KERNEL); if (!acl) return -ENOMEM; mlxsw_sp->acl = acl; acl->mlxsw_sp = mlxsw_sp; acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_FLEX_KEYS), - mlxsw_sp1_afk_blocks, - MLXSW_SP1_AFK_BLOCKS_COUNT); + mlxsw_sp->afk_ops); if (!acl->afk) { err = -ENOMEM; goto err_afk_create; @@ -857,12 +856,10 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) acl->dummy_fid = fid; INIT_LIST_HEAD(&acl->rules); - err = acl_ops->init(mlxsw_sp, acl->priv); + err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam); if (err) goto err_acl_ops_init; - acl->ops = acl_ops; - /* Create the delayed work for the rule activity_update */ INIT_DELAYED_WORK(&acl->rule_activity_update.dw, mlxsw_sp_acl_rul_activity_update_work); @@ -884,10 +881,9 @@ err_afk_create: void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - const struct mlxsw_sp_acl_ops *acl_ops = acl->ops; cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw); - acl_ops->fini(mlxsw_sp, acl->priv); + mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam); WARN_ON(!list_empty(&acl->rules)); mlxsw_sp_fid_put(acl->dummy_fid); rhashtable_destroy(&acl->ruleset_ht); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c new file mode 100644 index 000000000000..ef0d4c0a5a1f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c @@ -0,0 +1,215 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017-2018 Jiri Pirko <[email protected]> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/parman.h> + +#include "reg.h" +#include "core.h" +#include "spectrum.h" +#include "spectrum_acl_tcam.h" + +static int +mlxsw_sp_acl_ctcam_region_resize(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + u16 new_size) +{ + char ptar_pl[MLXSW_REG_PTAR_LEN]; + + mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE, + region->key_type, new_size, region->id, + region->tcam_region_info); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl); +} + +static void +mlxsw_sp_acl_ctcam_region_move(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + u16 src_offset, u16 dst_offset, u16 size) +{ + char prcr_pl[MLXSW_REG_PRCR_LEN]; + + mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE, + region->tcam_region_info, src_offset, + region->tcam_region_info, dst_offset, size); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl); +} + +static int +mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + unsigned int offset, + struct mlxsw_sp_acl_rule_info *rulei, + bool fillup_priority) +{ + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + char *act_set; + u32 priority; + char *mask; + char *key; + int err; + + err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority, + fillup_priority); + if (err) + return err; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE, + region->tcam_region_info, offset, priority); + key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl); + mask = mlxsw_reg_ptce2_mask_data(ptce2_pl); + mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask); + + /* Only the first action set belongs here, the rest is in KVD */ + act_set = mlxsw_afa_block_first_set(rulei->act_block); + mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); +} + +static void +mlxsw_sp_acl_ctcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + unsigned int offset) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + + mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE, + region->tcam_region_info, offset, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); +} + +static int mlxsw_sp_acl_ctcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp_acl_ctcam_region *cregion = priv; + struct mlxsw_sp_acl_tcam_region *region = cregion->region; + struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + return mlxsw_sp_acl_ctcam_region_resize(mlxsw_sp, region, new_count); +} + +static void mlxsw_sp_acl_ctcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp_acl_ctcam_region *cregion = priv; + struct mlxsw_sp_acl_tcam_region *region = cregion->region; + struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; + + mlxsw_sp_acl_ctcam_region_move(mlxsw_sp, region, + from_index, to_index, count); +} + +static const struct parman_ops mlxsw_sp_acl_ctcam_region_parman_ops = { + .base_count = MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp_acl_ctcam_region_parman_resize, + .move = mlxsw_sp_acl_ctcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +int mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_tcam_region *region) +{ + cregion->region = region; + cregion->parman = parman_create(&mlxsw_sp_acl_ctcam_region_parman_ops, + cregion); + if (!cregion->parman) + return -ENOMEM; + return 0; +} + +void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion) +{ + parman_destroy(cregion->parman); +} + +void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + unsigned int priority) +{ + parman_prio_init(cregion->parman, &cchunk->parman_prio, priority); +} + +void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk) +{ + parman_prio_fini(&cchunk->parman_prio); +} + +int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei, + bool fillup_priority) +{ + int err; + + err = parman_item_add(cregion->parman, &cchunk->parman_prio, + ¢ry->parman_item); + if (err) + return err; + + err = mlxsw_sp_acl_ctcam_region_entry_insert(mlxsw_sp, cregion->region, + centry->parman_item.index, + rulei, fillup_priority); + if (err) + goto err_rule_insert; + return 0; + +err_rule_insert: + parman_item_remove(cregion->parman, &cchunk->parman_prio, + ¢ry->parman_item); + return err; +} + +void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + struct mlxsw_sp_acl_ctcam_entry *centry) +{ + mlxsw_sp_acl_ctcam_region_entry_remove(mlxsw_sp, cregion->region, + centry->parman_item.index); + parman_item_remove(cregion->parman, &cchunk->parman_prio, + ¢ry->parman_item); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c index 5ab070916949..6a7c3406b724 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -37,8 +37,6 @@ #include "core_acl_flex_actions.h" #include "spectrum_span.h" -#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1 - static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, char *enc_actions, bool is_first) { @@ -53,8 +51,8 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, if (is_first) return 0; - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE, - &kvdl_index); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + 1, &kvdl_index); if (err) return err; mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); @@ -65,7 +63,8 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, return 0; err_pefa_write: - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + 1, kvdl_index); return err; } @@ -76,7 +75,8 @@ static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, if (is_first) return; - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, + 1, kvdl_index); } static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, @@ -87,7 +87,8 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, u32 kvdl_index; int err; - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS, + 1, &kvdl_index); if (err) return err; mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); @@ -98,7 +99,8 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, return 0; err_ppbs_write: - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS, + 1, kvdl_index); return err; } @@ -106,7 +108,8 @@ static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) { struct mlxsw_sp *mlxsw_sp = priv; - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_PBS, + 1, kvdl_index); } static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index a120c0dccd78..80f22b7c21da 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -1,7 +1,7 @@ /* - * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Jiri Pirko <[email protected]> + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017-2018 Jiri Pirko <[email protected]> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,9 +32,10 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H -#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H - +#include <linux/kernel.h> +#include <linux/module.h> +#include "spectrum.h" +#include "item.h" #include "core_acl_flex_keys.h" static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = { @@ -126,6 +127,48 @@ static const struct mlxsw_afk_block mlxsw_sp1_afk_blocks[] = { MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type), }; -#define MLXSW_SP1_AFK_BLOCKS_COUNT ARRAY_SIZE(mlxsw_sp1_afk_blocks) - -#endif +static void mlxsw_sp1_afk_encode_u32(const struct mlxsw_item *storage_item, + const struct mlxsw_item *output_item, + char *storage, char *output_indexed) +{ + u32 value; + + value = __mlxsw_item_get32(storage, storage_item, 0); + __mlxsw_item_set32(output_indexed, output_item, 0, value); +} + +static void mlxsw_sp1_afk_encode_buf(const struct mlxsw_item *storage_item, + const struct mlxsw_item *output_item, + char *storage, char *output_indexed) +{ + char *storage_data = __mlxsw_item_data(storage, storage_item, 0); + char *output_data = __mlxsw_item_data(output_indexed, output_item, 0); + size_t len = output_item->size.bytes; + + memcpy(output_data, storage_data, len); +} + +#define MLXSW_SP1_AFK_KEY_BLOCK_SIZE 16 + +static void +mlxsw_sp1_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, + int block_index, char *storage, char *output) +{ + unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE; + char *output_indexed = output + offset; + const struct mlxsw_item *storage_item = &elinst->info->item; + const struct mlxsw_item *output_item = &elinst->item; + + if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32) + mlxsw_sp1_afk_encode_u32(storage_item, output_item, + storage, output_indexed); + else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF) + mlxsw_sp1_afk_encode_buf(storage_item, output_item, + storage, output_indexed); +} + +const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = { + .blocks = mlxsw_sp1_afk_blocks, + .blocks_count = ARRAY_SIZE(mlxsw_sp1_afk_blocks), + .encode_one = mlxsw_sp1_afk_encode_one, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index efbd2062b6ec..53fe51a8d720 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Jiri Pirko <[email protected]> + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017-2018 Jiri Pirko <[email protected]> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,25 +39,25 @@ #include <linux/list.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> -#include <linux/parman.h> #include "reg.h" #include "core.h" #include "resources.h" #include "spectrum.h" +#include "spectrum_acl_tcam.h" #include "core_acl_flex_keys.h" -struct mlxsw_sp_acl_tcam { - unsigned long *used_regions; /* bit array */ - unsigned int max_regions; - unsigned long *used_groups; /* bit array */ - unsigned int max_groups; - unsigned int max_group_size; -}; +size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + return ops->priv_size; +} -static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) { - struct mlxsw_sp_acl_tcam *tcam = priv; + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; u64 max_tcam_regions; u64 max_regions; u64 max_groups; @@ -88,21 +88,53 @@ static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) tcam->max_groups = max_groups; tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUP_SIZE); + + err = ops->init(mlxsw_sp, tcam->priv, tcam); + if (err) + goto err_tcam_init; + return 0; +err_tcam_init: + kfree(tcam->used_groups); err_alloc_used_groups: kfree(tcam->used_regions); return err; } -static void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) { - struct mlxsw_sp_acl_tcam *tcam = priv; + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + ops->fini(mlxsw_sp, tcam->priv); kfree(tcam->used_groups); kfree(tcam->used_regions); } +int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 *priority, bool fillup_priority) +{ + u64 max_priority; + + if (!fillup_priority) { + *priority = 0; + return 0; + } + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, KVD_SIZE)) + return -EIO; + + max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE); + if (rulei->priority > max_priority) + return -EINVAL; + + /* Unlike in TC, in HW, higher number means higher priority. */ + *priority = max_priority - rulei->priority; + return 0; +} + static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam, u16 *p_id) { @@ -159,36 +191,21 @@ struct mlxsw_sp_acl_tcam_group { unsigned int patterns_count; }; -struct mlxsw_sp_acl_tcam_region { - struct list_head list; /* Member of a TCAM group */ - struct list_head chunk_list; /* List of chunks under this region */ - struct parman *parman; - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_acl_tcam_group *group; - enum mlxsw_reg_ptar_key_type key_type; - u16 id; /* ACL ID and region ID - they are same */ - char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; - struct mlxsw_afk_key_info *key_info; - struct { - struct parman_prio parman_prio; - struct parman_item parman_item; - struct mlxsw_sp_acl_rule_info *rulei; - } catchall; -}; - struct mlxsw_sp_acl_tcam_chunk { struct list_head list; /* Member of a TCAM region */ struct rhash_head ht_node; /* Member of a chunk HT */ unsigned int priority; /* Priority within the region and group */ - struct parman_prio parman_prio; struct mlxsw_sp_acl_tcam_group *group; struct mlxsw_sp_acl_tcam_region *region; unsigned int ref_count; + unsigned long priv[0]; + /* priv has to be always the last item */ }; struct mlxsw_sp_acl_tcam_entry { - struct parman_item parman_item; struct mlxsw_sp_acl_tcam_chunk *chunk; + unsigned long priv[0]; + /* priv has to be always the last item */ }; static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = { @@ -442,9 +459,6 @@ mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group, memcpy(out, elusage, sizeof(*out)); } -#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16 -#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16 - static int mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region) @@ -486,19 +500,6 @@ mlxsw_sp_acl_tcam_region_free(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_acl_tcam_region_resize(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region, - u16 new_size) -{ - char ptar_pl[MLXSW_REG_PTAR_LEN]; - - mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE, - region->key_type, new_size, region->id, - region->tcam_region_info); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl); -} - -static int mlxsw_sp_acl_tcam_region_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region) { @@ -520,193 +521,22 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl); } -static int -mlxsw_sp_acl_tcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region, - unsigned int offset, - struct mlxsw_sp_acl_rule_info *rulei) -{ - char ptce2_pl[MLXSW_REG_PTCE2_LEN]; - char *act_set; - char *mask; - char *key; - - mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE, - region->tcam_region_info, offset); - key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl); - mask = mlxsw_reg_ptce2_mask_data(ptce2_pl); - mlxsw_afk_encode(region->key_info, &rulei->values, key, mask); - - /* Only the first action set belongs here, the rest is in KVD */ - act_set = mlxsw_afa_block_first_set(rulei->act_block); - mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); -} - -static void -mlxsw_sp_acl_tcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region, - unsigned int offset) -{ - char ptce2_pl[MLXSW_REG_PTCE2_LEN]; - - mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE, - region->tcam_region_info, offset); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); -} - -static int -mlxsw_sp_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region, - unsigned int offset, - bool *activity) -{ - char ptce2_pl[MLXSW_REG_PTCE2_LEN]; - int err; - - mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ, - region->tcam_region_info, offset); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); - if (err) - return err; - *activity = mlxsw_reg_ptce2_a_get(ptce2_pl); - return 0; -} - -#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U) - -static int -mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region) -{ - struct parman_prio *parman_prio = ®ion->catchall.parman_prio; - struct parman_item *parman_item = ®ion->catchall.parman_item; - struct mlxsw_sp_acl_rule_info *rulei; - int err; - - parman_prio_init(region->parman, parman_prio, - MLXSW_SP_ACL_TCAM_CATCHALL_PRIO); - err = parman_item_add(region->parman, parman_prio, parman_item); - if (err) - goto err_parman_item_add; - - rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl); - if (IS_ERR(rulei)) { - err = PTR_ERR(rulei); - goto err_rulei_create; - } - - err = mlxsw_sp_acl_rulei_act_continue(rulei); - if (WARN_ON(err)) - goto err_rulei_act_continue; - - err = mlxsw_sp_acl_rulei_commit(rulei); - if (err) - goto err_rulei_commit; - - err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region, - parman_item->index, rulei); - region->catchall.rulei = rulei; - if (err) - goto err_rule_insert; - - return 0; - -err_rule_insert: -err_rulei_commit: -err_rulei_act_continue: - mlxsw_sp_acl_rulei_destroy(rulei); -err_rulei_create: - parman_item_remove(region->parman, parman_prio, parman_item); -err_parman_item_add: - parman_prio_fini(parman_prio); - return err; -} - -static void -mlxsw_sp_acl_tcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region) -{ - struct parman_prio *parman_prio = ®ion->catchall.parman_prio; - struct parman_item *parman_item = ®ion->catchall.parman_item; - struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei; - - mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region, - parman_item->index); - mlxsw_sp_acl_rulei_destroy(rulei); - parman_item_remove(region->parman, parman_prio, parman_item); - parman_prio_fini(parman_prio); -} - -static void -mlxsw_sp_acl_tcam_region_move(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region, - u16 src_offset, u16 dst_offset, u16 size) -{ - char prcr_pl[MLXSW_REG_PRCR_LEN]; - - mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE, - region->tcam_region_info, src_offset, - region->tcam_region_info, dst_offset, size); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl); -} - -static int mlxsw_sp_acl_tcam_region_parman_resize(void *priv, - unsigned long new_count) -{ - struct mlxsw_sp_acl_tcam_region *region = priv; - struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; - u64 max_tcam_rules; - - max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); - if (new_count > max_tcam_rules) - return -EINVAL; - return mlxsw_sp_acl_tcam_region_resize(mlxsw_sp, region, new_count); -} - -static void mlxsw_sp_acl_tcam_region_parman_move(void *priv, - unsigned long from_index, - unsigned long to_index, - unsigned long count) -{ - struct mlxsw_sp_acl_tcam_region *region = priv; - struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; - - mlxsw_sp_acl_tcam_region_move(mlxsw_sp, region, - from_index, to_index, count); -} - -static const struct parman_ops mlxsw_sp_acl_tcam_region_parman_ops = { - .base_count = MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT, - .resize_step = MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP, - .resize = mlxsw_sp_acl_tcam_region_parman_resize, - .move = mlxsw_sp_acl_tcam_region_parman_move, - .algo = PARMAN_ALGO_TYPE_LSORT, -}; - static struct mlxsw_sp_acl_tcam_region * mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam, struct mlxsw_afk_element_usage *elusage) { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); struct mlxsw_sp_acl_tcam_region *region; int err; - region = kzalloc(sizeof(*region), GFP_KERNEL); + region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL); if (!region) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(®ion->chunk_list); region->mlxsw_sp = mlxsw_sp; - region->parman = parman_create(&mlxsw_sp_acl_tcam_region_parman_ops, - region); - if (!region->parman) { - err = -ENOMEM; - goto err_parman_create; - } - region->key_info = mlxsw_afk_key_info_get(afk, elusage); if (IS_ERR(region->key_info)) { err = PTR_ERR(region->key_info); @@ -717,7 +547,7 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_region_id_get; - region->key_type = MLXSW_REG_PTAR_KEY_TYPE_FLEX; + region->key_type = ops->key_type; err = mlxsw_sp_acl_tcam_region_alloc(mlxsw_sp, region); if (err) goto err_tcam_region_alloc; @@ -726,13 +556,13 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_tcam_region_enable; - err = mlxsw_sp_acl_tcam_region_catchall_add(mlxsw_sp, region); + err = ops->region_init(mlxsw_sp, region->priv, region); if (err) - goto err_tcam_region_catchall_add; + goto err_tcam_region_init; return region; -err_tcam_region_catchall_add: +err_tcam_region_init: mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); err_tcam_region_enable: mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); @@ -741,8 +571,6 @@ err_tcam_region_alloc: err_region_id_get: mlxsw_afk_key_info_put(region->key_info); err_key_info_get: - parman_destroy(region->parman); -err_parman_create: kfree(region); return ERR_PTR(err); } @@ -751,12 +579,13 @@ static void mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region) { - mlxsw_sp_acl_tcam_region_catchall_del(mlxsw_sp, region); + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + ops->region_fini(mlxsw_sp, region->priv); mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id); mlxsw_afk_key_info_put(region->key_info); - parman_destroy(region->parman); kfree(region); } @@ -831,13 +660,14 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, unsigned int priority, struct mlxsw_afk_element_usage *elusage) { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk; int err; if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO) return ERR_PTR(-EINVAL); - chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL); if (!chunk) return ERR_PTR(-ENOMEM); chunk->priority = priority; @@ -849,7 +679,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_chunk_assoc; - parman_prio_init(chunk->region->parman, &chunk->parman_prio, priority); + ops->chunk_init(chunk->region->priv, chunk->priv, priority); err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node, mlxsw_sp_acl_tcam_chunk_ht_params); @@ -859,7 +689,7 @@ mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, return chunk; err_rhashtable_insert: - parman_prio_fini(&chunk->parman_prio); + ops->chunk_fini(chunk->priv); mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); err_chunk_assoc: kfree(chunk); @@ -870,11 +700,12 @@ static void mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_chunk *chunk) { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_group *group = chunk->group; rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node, mlxsw_sp_acl_tcam_chunk_ht_params); - parman_prio_fini(&chunk->parman_prio); + ops->chunk_fini(chunk->priv); mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); kfree(chunk); } @@ -908,11 +739,19 @@ static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk); } +static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + return ops->entry_priv_size; +} + static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_group *group, struct mlxsw_sp_acl_tcam_entry *entry, struct mlxsw_sp_acl_rule_info *rulei) { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk; struct mlxsw_sp_acl_tcam_region *region; int err; @@ -923,24 +762,16 @@ static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(chunk); region = chunk->region; - err = parman_item_add(region->parman, &chunk->parman_prio, - &entry->parman_item); - if (err) - goto err_parman_item_add; - err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region, - entry->parman_item.index, - rulei); + err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv, + entry->priv, rulei); if (err) - goto err_rule_insert; + goto err_entry_add; entry->chunk = chunk; return 0; -err_rule_insert: - parman_item_remove(region->parman, &chunk->parman_prio, - &entry->parman_item); -err_parman_item_add: +err_entry_add: mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); return err; } @@ -948,13 +779,11 @@ err_parman_item_add: static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_entry *entry) { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; struct mlxsw_sp_acl_tcam_region *region = chunk->region; - mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region, - entry->parman_item.index); - parman_item_remove(region->parman, &chunk->parman_prio, - &entry->parman_item); + ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv); mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); } @@ -963,12 +792,12 @@ mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_entry *entry, bool *activity) { + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; struct mlxsw_sp_acl_tcam_region *region = chunk->region; - return mlxsw_sp_acl_tcam_region_entry_activity_get(mlxsw_sp, region, - entry->parman_item.index, - activity); + return ops->entry_activity_get(mlxsw_sp, region->priv, + entry->priv, activity); } static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { @@ -1030,10 +859,10 @@ struct mlxsw_sp_acl_tcam_flower_rule { static int mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp, - void *priv, void *ruleset_priv) + struct mlxsw_sp_acl_tcam *tcam, + void *ruleset_priv) { struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; - struct mlxsw_sp_acl_tcam *tcam = priv; return mlxsw_sp_acl_tcam_group_add(mlxsw_sp, tcam, &ruleset->group, mlxsw_sp_acl_tcam_patterns, @@ -1081,6 +910,12 @@ mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv) return mlxsw_sp_acl_tcam_group_id(&ruleset->group); } +static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp) +{ + return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) + + mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp); +} + static int mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -1118,7 +953,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id, - .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), + .rule_priv_size = mlxsw_sp_acl_tcam_flower_rule_priv_size, .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, .rule_activity_get = mlxsw_sp_acl_tcam_flower_rule_activity_get, @@ -1129,7 +964,7 @@ mlxsw_sp_acl_tcam_profile_ops_arr[] = { [MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops, }; -static const struct mlxsw_sp_acl_profile_ops * +const struct mlxsw_sp_acl_profile_ops * mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_acl_profile profile) { @@ -1142,10 +977,3 @@ mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, return NULL; return ops; } - -const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops = { - .priv_size = sizeof(struct mlxsw_sp_acl_tcam), - .init = mlxsw_sp_acl_tcam_init, - .fini = mlxsw_sp_acl_tcam_fini, - .profile_ops = mlxsw_sp_acl_tcam_profile_ops, -}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h new file mode 100644 index 000000000000..cef769764505 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -0,0 +1,146 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017-2018 Jiri Pirko <[email protected]> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_ACL_TCAM_H +#define _MLXSW_SPECTRUM_ACL_TCAM_H + +#include <linux/list.h> +#include <linux/parman.h> + +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_keys.h" + +struct mlxsw_sp_acl_tcam { + unsigned long *used_regions; /* bit array */ + unsigned int max_regions; + unsigned long *used_groups; /* bit array */ + unsigned int max_groups; + unsigned int max_group_size; + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam); +void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam); +int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 *priority, bool fillup_priority); + +struct mlxsw_sp_acl_profile_ops { + size_t ruleset_priv_size; + int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, void *ruleset_priv); + void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv); + int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress); + void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress); + u16 (*ruleset_group_id)(void *ruleset_priv); + size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp); + int (*rule_add)(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei); + void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv); + int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, + bool *activity); +}; + +const struct mlxsw_sp_acl_profile_ops * +mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_acl_profile profile); + +#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16 + +#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U) + +struct mlxsw_sp_acl_tcam_group; + +struct mlxsw_sp_acl_tcam_region { + struct list_head list; /* Member of a TCAM group */ + struct list_head chunk_list; /* List of chunks under this region */ + struct mlxsw_sp_acl_tcam_group *group; + enum mlxsw_reg_ptar_key_type key_type; + u16 id; /* ACL ID and region ID - they are same */ + char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; + struct mlxsw_afk_key_info *key_info; + struct mlxsw_sp *mlxsw_sp; + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_acl_ctcam_region { + struct parman *parman; + struct mlxsw_sp_acl_tcam_region *region; +}; + +struct mlxsw_sp_acl_ctcam_chunk { + struct parman_prio parman_prio; +}; + +struct mlxsw_sp_acl_ctcam_entry { + struct parman_item parman_item; +}; + +int mlxsw_sp_acl_ctcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_tcam_region *region); +void mlxsw_sp_acl_ctcam_region_fini(struct mlxsw_sp_acl_ctcam_region *cregion); +void mlxsw_sp_acl_ctcam_chunk_init(struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + unsigned int priority); +void mlxsw_sp_acl_ctcam_chunk_fini(struct mlxsw_sp_acl_ctcam_chunk *cchunk); +int mlxsw_sp_acl_ctcam_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei, + bool fillup_priority); +void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_chunk *cchunk, + struct mlxsw_sp_acl_ctcam_entry *centry); +static inline unsigned int +mlxsw_sp_acl_ctcam_entry_offset(struct mlxsw_sp_acl_ctcam_entry *centry) +{ + return centry->parman_item.index; +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index fe4327f547d2..fd557585514d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. - * Copyright (c) 2016 Jiri Pirko <[email protected]> + * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2018 Jiri Pirko <[email protected]> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,422 +33,74 @@ */ #include <linux/kernel.h> -#include <linux/bitops.h> +#include <linux/slab.h> #include "spectrum.h" -#define MLXSW_SP_KVDL_SINGLE_BASE 0 -#define MLXSW_SP_KVDL_SINGLE_SIZE 16384 -#define MLXSW_SP_KVDL_SINGLE_END \ - (MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1) - -#define MLXSW_SP_KVDL_CHUNKS_BASE \ - (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE) -#define MLXSW_SP_KVDL_CHUNKS_SIZE 49152 -#define MLXSW_SP_KVDL_CHUNKS_END \ - (MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1) - -#define MLXSW_SP_KVDL_LARGE_CHUNKS_BASE \ - (MLXSW_SP_KVDL_CHUNKS_BASE + MLXSW_SP_KVDL_CHUNKS_SIZE) -#define MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE \ - (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_LARGE_CHUNKS_BASE) -#define MLXSW_SP_KVDL_LARGE_CHUNKS_END \ - (MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1) - -#define MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE 1 -#define MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE 32 -#define MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE 512 - -struct mlxsw_sp_kvdl_part_info { - unsigned int part_index; - unsigned int start_index; - unsigned int end_index; - unsigned int alloc_size; - enum mlxsw_sp_resource_id resource_id; -}; - -enum mlxsw_sp_kvdl_part_id { - MLXSW_SP_KVDL_PART_ID_SINGLE, - MLXSW_SP_KVDL_PART_ID_CHUNKS, - MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS, -}; - -#define MLXSW_SP_KVDL_PART_INFO(id) \ -[MLXSW_SP_KVDL_PART_ID_##id] = { \ - .start_index = MLXSW_SP_KVDL_##id##_BASE, \ - .end_index = MLXSW_SP_KVDL_##id##_END, \ - .alloc_size = MLXSW_SP_KVDL_##id##_ALLOC_SIZE, \ - .resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_##id, \ -} - -static const struct mlxsw_sp_kvdl_part_info mlxsw_sp_kvdl_parts_info[] = { - MLXSW_SP_KVDL_PART_INFO(SINGLE), - MLXSW_SP_KVDL_PART_INFO(CHUNKS), - MLXSW_SP_KVDL_PART_INFO(LARGE_CHUNKS), -}; - -#define MLXSW_SP_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp_kvdl_parts_info) - -struct mlxsw_sp_kvdl_part { - struct mlxsw_sp_kvdl_part_info info; - unsigned long usage[0]; /* Entries */ -}; - struct mlxsw_sp_kvdl { - struct mlxsw_sp_kvdl_part *parts[MLXSW_SP_KVDL_PARTS_INFO_LEN]; + const struct mlxsw_sp_kvdl_ops *kvdl_ops; + unsigned long priv[0]; + /* priv has to be always the last item */ }; -static struct mlxsw_sp_kvdl_part * -mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl, - unsigned int alloc_size) -{ - struct mlxsw_sp_kvdl_part *part, *min_part = NULL; - int i; - - for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) { - part = kvdl->parts[i]; - if (alloc_size <= part->info.alloc_size && - (!min_part || - part->info.alloc_size <= min_part->info.alloc_size)) - min_part = part; - } - - return min_part ?: ERR_PTR(-ENOBUFS); -} - -static struct mlxsw_sp_kvdl_part * -mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index) -{ - struct mlxsw_sp_kvdl_part *part; - int i; - - for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) { - part = kvdl->parts[i]; - if (kvdl_index >= part->info.start_index && - kvdl_index <= part->info.end_index) - return part; - } - - return ERR_PTR(-EINVAL); -} - -static u32 -mlxsw_sp_entry_index_kvdl_index(const struct mlxsw_sp_kvdl_part_info *info, - unsigned int entry_index) -{ - return info->start_index + entry_index * info->alloc_size; -} - -static unsigned int -mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info, - u32 kvdl_index) -{ - return (kvdl_index - info->start_index) / info->alloc_size; -} - -static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part, - u32 *p_kvdl_index) -{ - const struct mlxsw_sp_kvdl_part_info *info = &part->info; - unsigned int entry_index, nr_entries; - - nr_entries = (info->end_index - info->start_index + 1) / - info->alloc_size; - entry_index = find_first_zero_bit(part->usage, nr_entries); - if (entry_index == nr_entries) - return -ENOBUFS; - __set_bit(entry_index, part->usage); - - *p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(info, entry_index); - - return 0; -} - -static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part, - u32 kvdl_index) -{ - const struct mlxsw_sp_kvdl_part_info *info = &part->info; - unsigned int entry_index; - - entry_index = mlxsw_sp_kvdl_index_entry_index(info, kvdl_index); - __clear_bit(entry_index, part->usage); -} - -int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, - u32 *p_entry_index) -{ - struct mlxsw_sp_kvdl_part *part; - - /* Find partition with smallest allocation size satisfying the - * requested size. - */ - part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count); - if (IS_ERR(part)) - return PTR_ERR(part); - - return mlxsw_sp_kvdl_part_alloc(part, p_entry_index); -} - -void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) -{ - struct mlxsw_sp_kvdl_part *part; - - part = mlxsw_sp_kvdl_index_part(mlxsw_sp->kvdl, entry_index); - if (IS_ERR(part)) - return; - mlxsw_sp_kvdl_part_free(part, entry_index); -} - -int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, - unsigned int entry_count, - unsigned int *p_alloc_size) -{ - struct mlxsw_sp_kvdl_part *part; - - part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count); - if (IS_ERR(part)) - return PTR_ERR(part); - - *p_alloc_size = part->info.alloc_size; - - return 0; -} - -static void mlxsw_sp_kvdl_part_update(struct mlxsw_sp_kvdl_part *part, - struct mlxsw_sp_kvdl_part *part_prev, - unsigned int size) -{ - - if (!part_prev) { - part->info.end_index = size - 1; - } else { - part->info.start_index = part_prev->info.end_index + 1; - part->info.end_index = part->info.start_index + size - 1; - } -} - -static struct mlxsw_sp_kvdl_part * -mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_kvdl_part_info *info, - struct mlxsw_sp_kvdl_part *part_prev) +int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) { - struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); - struct mlxsw_sp_kvdl_part *part; - bool need_update = true; - unsigned int nr_entries; - size_t usage_size; - u64 resource_size; + const struct mlxsw_sp_kvdl_ops *kvdl_ops = mlxsw_sp->kvdl_ops; + struct mlxsw_sp_kvdl *kvdl; int err; - err = devlink_resource_size_get(devlink, info->resource_id, - &resource_size); - if (err) { - need_update = false; - resource_size = info->end_index - info->start_index + 1; - } - - nr_entries = div_u64(resource_size, info->alloc_size); - usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long); - part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); - if (!part) - return ERR_PTR(-ENOMEM); - - memcpy(&part->info, info, sizeof(part->info)); - - if (need_update) - mlxsw_sp_kvdl_part_update(part, part_prev, resource_size); - return part; -} - -static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp_kvdl_part *part) -{ - kfree(part); -} - -static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; - const struct mlxsw_sp_kvdl_part_info *info; - struct mlxsw_sp_kvdl_part *part_prev = NULL; - int err, i; + kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl) + kvdl_ops->priv_size, + GFP_KERNEL); + if (!kvdl) + return -ENOMEM; + kvdl->kvdl_ops = kvdl_ops; + mlxsw_sp->kvdl = kvdl; - for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) { - info = &mlxsw_sp_kvdl_parts_info[i]; - kvdl->parts[i] = mlxsw_sp_kvdl_part_init(mlxsw_sp, info, - part_prev); - if (IS_ERR(kvdl->parts[i])) { - err = PTR_ERR(kvdl->parts[i]); - goto err_kvdl_part_init; - } - part_prev = kvdl->parts[i]; - } + err = kvdl_ops->init(mlxsw_sp, kvdl->priv); + if (err) + goto err_init; return 0; -err_kvdl_part_init: - for (i--; i >= 0; i--) - mlxsw_sp_kvdl_part_fini(kvdl->parts[i]); +err_init: + kfree(kvdl); return err; } -static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp) +void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; - int i; - - for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) - mlxsw_sp_kvdl_part_fini(kvdl->parts[i]); -} - -static u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part) -{ - const struct mlxsw_sp_kvdl_part_info *info = &part->info; - unsigned int nr_entries; - int bit = -1; - u64 occ = 0; - - nr_entries = (info->end_index - - info->start_index + 1) / - info->alloc_size; - while ((bit = find_next_bit(part->usage, nr_entries, bit + 1)) - < nr_entries) - occ += info->alloc_size; - return occ; -} - -static u64 mlxsw_sp_kvdl_occ_get(void *priv) -{ - const struct mlxsw_sp *mlxsw_sp = priv; - u64 occ = 0; - int i; - - for (i = 0; i < MLXSW_SP_KVDL_PARTS_INFO_LEN; i++) - occ += mlxsw_sp_kvdl_part_occ(mlxsw_sp->kvdl->parts[i]); - - return occ; -} - -static u64 mlxsw_sp_kvdl_single_occ_get(void *priv) -{ - const struct mlxsw_sp *mlxsw_sp = priv; - struct mlxsw_sp_kvdl_part *part; - - part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_SINGLE]; - return mlxsw_sp_kvdl_part_occ(part); -} - -static u64 mlxsw_sp_kvdl_chunks_occ_get(void *priv) -{ - const struct mlxsw_sp *mlxsw_sp = priv; - struct mlxsw_sp_kvdl_part *part; - - part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_CHUNKS]; - return mlxsw_sp_kvdl_part_occ(part); -} - -static u64 mlxsw_sp_kvdl_large_chunks_occ_get(void *priv) -{ - const struct mlxsw_sp *mlxsw_sp = priv; - struct mlxsw_sp_kvdl_part *part; - part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS]; - return mlxsw_sp_kvdl_part_occ(part); + kvdl->kvdl_ops->fini(mlxsw_sp, kvdl->priv); + kfree(kvdl); } -int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core) +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, u32 *p_entry_index) { - struct devlink *devlink = priv_to_devlink(mlxsw_core); - static struct devlink_resource_size_params size_params; - u32 kvdl_max_size; - int err; - - kvdl_max_size = MLXSW_CORE_RES_GET(mlxsw_core, KVD_SIZE) - - MLXSW_CORE_RES_GET(mlxsw_core, KVD_SINGLE_MIN_SIZE) - - MLXSW_CORE_RES_GET(mlxsw_core, KVD_DOUBLE_MIN_SIZE); - - devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, - MLXSW_SP_KVDL_SINGLE_ALLOC_SIZE, - DEVLINK_RESOURCE_UNIT_ENTRY); - err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_SINGLES, - MLXSW_SP_KVDL_SINGLE_SIZE, - MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, - MLXSW_SP_RESOURCE_KVD_LINEAR, - &size_params); - if (err) - return err; - - devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, - MLXSW_SP_KVDL_CHUNKS_ALLOC_SIZE, - DEVLINK_RESOURCE_UNIT_ENTRY); - err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_CHUNKS, - MLXSW_SP_KVDL_CHUNKS_SIZE, - MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, - MLXSW_SP_RESOURCE_KVD_LINEAR, - &size_params); - if (err) - return err; + struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; - devlink_resource_size_params_init(&size_params, 0, kvdl_max_size, - MLXSW_SP_KVDL_LARGE_CHUNKS_ALLOC_SIZE, - DEVLINK_RESOURCE_UNIT_ENTRY); - err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD_LINEAR_LARGE_CHUNKS, - MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE, - MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, - MLXSW_SP_RESOURCE_KVD_LINEAR, - &size_params); - return err; + return kvdl->kvdl_ops->alloc(mlxsw_sp, kvdl->priv, type, + entry_count, p_entry_index); } -int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, int entry_index) { - struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); - struct mlxsw_sp_kvdl *kvdl; - int err; - - kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl), GFP_KERNEL); - if (!kvdl) - return -ENOMEM; - mlxsw_sp->kvdl = kvdl; - - err = mlxsw_sp_kvdl_parts_init(mlxsw_sp); - if (err) - goto err_kvdl_parts_init; - - devlink_resource_occ_get_register(devlink, - MLXSW_SP_RESOURCE_KVD_LINEAR, - mlxsw_sp_kvdl_occ_get, - mlxsw_sp); - devlink_resource_occ_get_register(devlink, - MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE, - mlxsw_sp_kvdl_single_occ_get, - mlxsw_sp); - devlink_resource_occ_get_register(devlink, - MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS, - mlxsw_sp_kvdl_chunks_occ_get, - mlxsw_sp); - devlink_resource_occ_get_register(devlink, - MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS, - mlxsw_sp_kvdl_large_chunks_occ_get, - mlxsw_sp); - - return 0; + struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; -err_kvdl_parts_init: - kfree(mlxsw_sp->kvdl); - return err; + kvdl->kvdl_ops->free(mlxsw_sp, kvdl->priv, type, + entry_count, entry_index); } -void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp) +int mlxsw_sp_kvdl_alloc_count_query(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_kvdl_entry_type type, + unsigned int entry_count, + unsigned int *p_alloc_count) { - struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_kvdl *kvdl = mlxsw_sp->kvdl; - devlink_resource_occ_get_unregister(devlink, - MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS); - devlink_resource_occ_get_unregister(devlink, - MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS); - devlink_resource_occ_get_unregister(devlink, - MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE); - devlink_resource_occ_get_unregister(devlink, - MLXSW_SP_RESOURCE_KVD_LINEAR); - mlxsw_sp_kvdl_parts_fini(mlxsw_sp); - kfree(mlxsw_sp->kvdl); + return kvdl->kvdl_ops->alloc_size_query(mlxsw_sp, kvdl->priv, type, + entry_count, p_alloc_count); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index a82539609d49..98dcaf78365c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -1075,6 +1075,6 @@ void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_mr *mr = mlxsw_sp->mr; cancel_delayed_work_sync(&mr->stats_update_dw); - mr->mr_ops->fini(mr->priv); + mr->mr_ops->fini(mlxsw_sp, mr->priv); kfree(mr); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h index 7c864a86811d..c92fa90dca31 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -46,15 +46,6 @@ enum mlxsw_sp_mr_route_action { MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD, }; -enum mlxsw_sp_mr_route_prio { - MLXSW_SP_MR_ROUTE_PRIO_SG, - MLXSW_SP_MR_ROUTE_PRIO_STARG, - MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, - __MLXSW_SP_MR_ROUTE_PRIO_MAX -}; - -#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1) - struct mlxsw_sp_mr_route_key { int vrid; enum mlxsw_sp_l3proto proto; @@ -101,7 +92,7 @@ struct mlxsw_sp_mr_ops { u16 erif_index); void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv, void *route_priv); - void (*fini)(void *priv); + void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv); }; struct mlxsw_sp_mr; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index 4f4c0d311883..e9c9f1f45b9d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -1,7 +1,8 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 Yotam Gigi <[email protected]> + * Copyright (c) 2018 Jiri Pirko <[email protected]> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +36,6 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/netdevice.h> -#include <linux/parman.h> #include "spectrum_mr_tcam.h" #include "reg.h" @@ -43,15 +43,8 @@ #include "core_acl_flex_actions.h" #include "spectrum_mr.h" -struct mlxsw_sp_mr_tcam_region { - struct mlxsw_sp *mlxsw_sp; - enum mlxsw_reg_rtar_key_type rtar_key_type; - struct parman *parman; - struct parman_prio *parman_prios; -}; - struct mlxsw_sp_mr_tcam { - struct mlxsw_sp_mr_tcam_region tcam_regions[MLXSW_SP_L3_PROTO_MAX]; + void *priv; }; /* This struct maps to one RIGR2 register entry */ @@ -84,8 +77,6 @@ mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list) INIT_LIST_HEAD(&erif_list->erif_sublists); } -#define MLXSW_SP_KVDL_RIGR2_SIZE 1 - static struct mlxsw_sp_mr_erif_sublist * mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_tcam_erif_list *erif_list) @@ -96,8 +87,8 @@ mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp, erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL); if (!erif_sublist) return ERR_PTR(-ENOMEM); - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE, - &erif_sublist->rigr2_kvdl_index); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR, + 1, &erif_sublist->rigr2_kvdl_index); if (err) { kfree(erif_sublist); return ERR_PTR(err); @@ -112,7 +103,8 @@ mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_erif_sublist *erif_sublist) { list_del(&erif_sublist->list); - mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR, + 1, erif_sublist->rigr2_kvdl_index); kfree(erif_sublist); } @@ -221,12 +213,11 @@ struct mlxsw_sp_mr_tcam_route { struct mlxsw_sp_mr_tcam_erif_list erif_list; struct mlxsw_afa_block *afa_block; u32 counter_index; - struct parman_item parman_item; - struct parman_prio *parman_prio; enum mlxsw_sp_mr_route_action action; struct mlxsw_sp_mr_route_key key; u16 irif_index; u16 min_mtu; + void *priv; }; static struct mlxsw_afa_block * @@ -297,60 +288,6 @@ mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block) mlxsw_afa_block_destroy(afa_block); } -static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp, - struct parman_item *parman_item, - struct mlxsw_sp_mr_route_key *key, - struct mlxsw_afa_block *afa_block) -{ - char rmft2_pl[MLXSW_REG_RMFT2_LEN]; - - switch (key->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index, - key->vrid, - MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, - ntohl(key->group.addr4), - ntohl(key->group_mask.addr4), - ntohl(key->source.addr4), - ntohl(key->source_mask.addr4), - mlxsw_afa_block_first_set(afa_block)); - break; - case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, true, parman_item->index, - key->vrid, - MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, - key->group.addr6, - key->group_mask.addr6, - key->source.addr6, - key->source_mask.addr6, - mlxsw_afa_block_first_set(afa_block)); - } - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); -} - -static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid, - struct mlxsw_sp_mr_route_key *key, - struct parman_item *parman_item) -{ - struct in6_addr zero_addr = IN6ADDR_ANY_INIT; - char rmft2_pl[MLXSW_REG_RMFT2_LEN]; - - switch (key->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, - vrid, 0, 0, 0, 0, 0, 0, NULL); - break; - case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_reg_rmft2_ipv6_pack(rmft2_pl, false, parman_item->index, - vrid, 0, 0, zero_addr, zero_addr, - zero_addr, zero_addr, NULL); - break; - } - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); -} - static int mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_tcam_erif_list *erif_list, @@ -370,51 +307,12 @@ mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp, return 0; } -static struct mlxsw_sp_mr_tcam_region * -mlxsw_sp_mr_tcam_protocol_region(struct mlxsw_sp_mr_tcam *mr_tcam, - enum mlxsw_sp_l3proto proto) -{ - return &mr_tcam->tcam_regions[proto]; -} - -static int -mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam, - struct mlxsw_sp_mr_tcam_route *route, - enum mlxsw_sp_mr_route_prio prio) -{ - struct mlxsw_sp_mr_tcam_region *tcam_region; - int err; - - tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam, - route->key.proto); - err = parman_item_add(tcam_region->parman, - &tcam_region->parman_prios[prio], - &route->parman_item); - if (err) - return err; - - route->parman_prio = &tcam_region->parman_prios[prio]; - return 0; -} - -static void -mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam, - struct mlxsw_sp_mr_tcam_route *route) -{ - struct mlxsw_sp_mr_tcam_region *tcam_region; - - tcam_region = mlxsw_sp_mr_tcam_protocol_region(mr_tcam, - route->key.proto); - - parman_item_remove(tcam_region->parman, - route->parman_prio, &route->parman_item); -} - static int mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, void *route_priv, struct mlxsw_sp_mr_route_params *route_params) { + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; struct mlxsw_sp_mr_tcam_route *route = route_priv; struct mlxsw_sp_mr_tcam *mr_tcam = priv; int err; @@ -448,22 +346,23 @@ mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, goto err_afa_block_create; } - /* Allocate place in the TCAM */ - err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route, - route_params->prio); - if (err) - goto err_parman_item_add; + route->priv = kzalloc(ops->route_priv_size, GFP_KERNEL); + if (!route->priv) { + err = -ENOMEM; + goto err_route_priv_alloc; + } /* Write the route to the TCAM */ - err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, - &route->key, route->afa_block); + err = ops->route_create(mlxsw_sp, mr_tcam->priv, route->priv, + &route->key, route->afa_block, + route_params->prio); if (err) - goto err_route_replace; + goto err_route_create; return 0; -err_route_replace: - mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); -err_parman_item_add: +err_route_create: + kfree(route->priv); +err_route_priv_alloc: mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); err_afa_block_create: mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); @@ -476,12 +375,12 @@ err_counter_alloc: static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv, void *route_priv) { + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; struct mlxsw_sp_mr_tcam_route *route = route_priv; struct mlxsw_sp_mr_tcam *mr_tcam = priv; - mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid, - &route->key, &route->parman_item); - mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); + ops->route_destroy(mlxsw_sp, mr_tcam->priv, route->priv, &route->key); + kfree(route->priv); mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); @@ -502,6 +401,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, enum mlxsw_sp_mr_route_action route_action) { + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; struct mlxsw_sp_mr_tcam_route *route = route_priv; struct mlxsw_afa_block *afa_block; int err; @@ -516,8 +416,7 @@ mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(afa_block); /* Update the TCAM route entry */ - err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, - &route->key, afa_block); + err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block); if (err) goto err; @@ -534,6 +433,7 @@ err: static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, u16 min_mtu) { + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; struct mlxsw_sp_mr_tcam_route *route = route_priv; struct mlxsw_afa_block *afa_block; int err; @@ -549,8 +449,7 @@ static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(afa_block); /* Update the TCAM route entry */ - err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, - &route->key, afa_block); + err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block); if (err) goto err; @@ -596,6 +495,7 @@ static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp, void *route_priv, u16 erif_index) { + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; struct mlxsw_sp_mr_tcam_route *route = route_priv; struct mlxsw_sp_mr_erif_sublist *erif_sublist; struct mlxsw_sp_mr_tcam_erif_list erif_list; @@ -630,8 +530,7 @@ static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp, } /* Update the TCAM route entry */ - err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, - &route->key, afa_block); + err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block); if (err) goto err_route_write; @@ -653,6 +552,7 @@ static int mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, struct mlxsw_sp_mr_route_info *route_info) { + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; struct mlxsw_sp_mr_tcam_route *route = route_priv; struct mlxsw_sp_mr_tcam_erif_list erif_list; struct mlxsw_afa_block *afa_block; @@ -677,8 +577,7 @@ mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, } /* Update the TCAM route entry */ - err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, - &route->key, afa_block); + err = ops->route_update(mlxsw_sp, route->priv, &route->key, afa_block); if (err) goto err_route_write; @@ -699,167 +598,36 @@ err_erif_populate: return err; } -#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16 -#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16 - -static int -mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) -{ - struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; - char rtar_pl[MLXSW_REG_RTAR_LEN]; - - mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE, - mr_tcam_region->rtar_key_type, - MLXSW_SP_MR_TCAM_REGION_BASE_COUNT); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); -} - -static void -mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) -{ - struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; - char rtar_pl[MLXSW_REG_RTAR_LEN]; - - mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE, - mr_tcam_region->rtar_key_type, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); -} - -static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv, - unsigned long new_count) -{ - struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; - struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; - char rtar_pl[MLXSW_REG_RTAR_LEN]; - u64 max_tcam_rules; - - max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); - if (new_count > max_tcam_rules) - return -EINVAL; - mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE, - mr_tcam_region->rtar_key_type, new_count); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); -} - -static void mlxsw_sp_mr_tcam_region_parman_move(void *priv, - unsigned long from_index, - unsigned long to_index, - unsigned long count) -{ - struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; - struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; - char rrcr_pl[MLXSW_REG_RRCR_LEN]; - - mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE, - from_index, count, - mr_tcam_region->rtar_key_type, to_index); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl); -} - -static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = { - .base_count = MLXSW_SP_MR_TCAM_REGION_BASE_COUNT, - .resize_step = MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP, - .resize = mlxsw_sp_mr_tcam_region_parman_resize, - .move = mlxsw_sp_mr_tcam_region_parman_move, - .algo = PARMAN_ALGO_TYPE_LSORT, -}; - -static int -mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_mr_tcam_region *mr_tcam_region, - enum mlxsw_reg_rtar_key_type rtar_key_type) -{ - struct parman_prio *parman_prios; - struct parman *parman; - int err; - int i; - - mr_tcam_region->rtar_key_type = rtar_key_type; - mr_tcam_region->mlxsw_sp = mlxsw_sp; - - err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region); - if (err) - return err; - - parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops, - mr_tcam_region); - if (!parman) { - err = -ENOMEM; - goto err_parman_create; - } - mr_tcam_region->parman = parman; - - parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, - sizeof(*parman_prios), GFP_KERNEL); - if (!parman_prios) { - err = -ENOMEM; - goto err_parman_prios_alloc; - } - mr_tcam_region->parman_prios = parman_prios; - - for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) - parman_prio_init(mr_tcam_region->parman, - &mr_tcam_region->parman_prios[i], i); - return 0; - -err_parman_prios_alloc: - parman_destroy(parman); -err_parman_create: - mlxsw_sp_mr_tcam_region_free(mr_tcam_region); - return err; -} - -static void -mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) -{ - int i; - - for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) - parman_prio_fini(&mr_tcam_region->parman_prios[i]); - kfree(mr_tcam_region->parman_prios); - parman_destroy(mr_tcam_region->parman); - mlxsw_sp_mr_tcam_region_free(mr_tcam_region); -} - static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) { + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; struct mlxsw_sp_mr_tcam *mr_tcam = priv; - struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0]; - u32 rtar_key; int err; - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) || - !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES)) + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES)) return -EIO; - rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST; - err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp, - ®ion[MLXSW_SP_L3_PROTO_IPV4], - rtar_key); - if (err) - return err; + mr_tcam->priv = kzalloc(ops->priv_size, GFP_KERNEL); + if (!mr_tcam->priv) + return -ENOMEM; - rtar_key = MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST; - err = mlxsw_sp_mr_tcam_region_init(mlxsw_sp, - ®ion[MLXSW_SP_L3_PROTO_IPV6], - rtar_key); + err = ops->init(mlxsw_sp, mr_tcam->priv); if (err) - goto err_ipv6_region_init; - + goto err_init; return 0; -err_ipv6_region_init: - mlxsw_sp_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV4]); +err_init: + kfree(mr_tcam->priv); return err; } -static void mlxsw_sp_mr_tcam_fini(void *priv) +static void mlxsw_sp_mr_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) { + const struct mlxsw_sp_mr_tcam_ops *ops = mlxsw_sp->mr_tcam_ops; struct mlxsw_sp_mr_tcam *mr_tcam = priv; - struct mlxsw_sp_mr_tcam_region *region = &mr_tcam->tcam_regions[0]; - mlxsw_sp_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV6]); - mlxsw_sp_mr_tcam_region_fini(®ion[MLXSW_SP_L3_PROTO_IPV4]); + ops->fini(mr_tcam->priv); + kfree(mr_tcam->priv); } const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 79dcadb40857..b4126db695dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1106,7 +1106,8 @@ mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp, u32 tunnel_index; int err; - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + 1, &tunnel_index); if (err) return err; @@ -1122,7 +1123,8 @@ static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp, /* Unlink this node from the IPIP entry that it's the decap entry of. */ fib_entry->decap.ipip_entry->decap_fib_entry = NULL; fib_entry->decap.ipip_entry = NULL; - mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + 1, fib_entry->decap.tunnel_index); } static struct mlxsw_sp_fib_node * @@ -3162,8 +3164,9 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, * by the device and make sure the request can be satisfied. */ mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size); - err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size, - &alloc_size); + err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp, + MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + *p_adj_grp_size, &alloc_size); if (err) return err; /* It is possible the allocation results in more allocated @@ -3275,7 +3278,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, /* No valid allocation size available. */ goto set_trap; - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + ecmp_size, &adj_index); if (err) { /* We ran out of KVD linear space, just set the * trap and let everything flow through kernel. @@ -3310,7 +3314,8 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, old_adj_index, old_ecmp_size); - mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + old_ecmp_size, old_adj_index); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); goto set_trap; @@ -3332,7 +3337,8 @@ set_trap: if (err) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); if (old_adj_index_valid) - mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + nh_grp->ecmp_size, nh_grp->adj_index); } static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c index 4c417903e9be..094cea775d0c 100644 --- a/drivers/net/wimax/i2400m/control.c +++ b/drivers/net/wimax/i2400m/control.c @@ -566,13 +566,12 @@ static void i2400m_msg_ack_hook(struct i2400m *i2400m, { int result; struct device *dev = i2400m_dev(i2400m); - unsigned ack_type, ack_status; + unsigned int ack_type; char strerr[32]; /* Chew on the message, we might need some information from * here */ ack_type = le16_to_cpu(l3l4_hdr->type); - ack_status = le16_to_cpu(l3l4_hdr->status); switch (ack_type) { case I2400M_MT_CMD_ENTER_POWERSAVE: /* This is just left here for the sake of example, as diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index a89b5685e68b..e9fc168bb734 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -1552,7 +1552,6 @@ int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags) int ret, itr; struct device *dev = i2400m_dev(i2400m); struct i2400m_fw *i2400m_fw; - const struct i2400m_bcf_hdr *bcf; /* Firmware data */ const struct firmware *fw; const char *fw_name; @@ -1574,7 +1573,7 @@ int i2400m_dev_bootstrap(struct i2400m *i2400m, enum i2400m_bri flags) } /* Load firmware files to memory. */ - for (itr = 0, bcf = NULL, ret = -ENOENT; ; itr++) { + for (itr = 0, ret = -ENOENT; ; itr++) { fw_name = i2400m->bus_fw_names[itr]; if (fw_name == NULL) { dev_err(dev, "Could not find a usable firmware image\n"); diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c index a654687b5fa2..9ab3f0fdfea4 100644 --- a/drivers/net/wimax/i2400m/netdev.c +++ b/drivers/net/wimax/i2400m/netdev.c @@ -535,14 +535,12 @@ void i2400m_net_erx(struct i2400m *i2400m, struct sk_buff *skb, { struct net_device *net_dev = i2400m->wimax_dev.net_dev; struct device *dev = i2400m_dev(i2400m); - int protocol; d_fnstart(2, dev, "(i2400m %p skb %p [%u] cs %d)\n", i2400m, skb, skb->len, cs); switch(cs) { case I2400M_CS_IPV4_0: case I2400M_CS_IPV4: - protocol = ETH_P_IP; i2400m_rx_fake_eth_header(i2400m->wimax_dev.net_dev, skb->data - ETH_HLEN, cpu_to_be16(ETH_P_IP)); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 5a5e0a2ab2a3..07efffd0c759 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -294,12 +294,16 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *skb, *next; + struct list_head sublist; + INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); - if (ret != 1) - list_del(&skb->list); + list_del(&skb->list); + if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1) + list_add_tail(&skb->list, &sublist); } + /* Put passed packets back on main list */ + list_splice(&sublist, head); } /* Call setsockopt() */ @@ -410,8 +414,17 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; void nf_ct_attach(struct sk_buff *, const struct sk_buff *); +struct nf_conntrack_tuple; +bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +struct nf_conntrack_tuple; +static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) +{ + return false; +} #endif struct nf_conn; @@ -420,6 +433,8 @@ enum ip_conntrack_info; struct nf_ct_hook { int (*update)(struct net *net, struct sk_buff *skb); void (*destroy)(struct nf_conntrack *); + bool (*get_tuple_skb)(struct nf_conntrack_tuple *, + const struct sk_buff *); }; extern struct nf_ct_hook __rcu *nf_ct_hook; diff --git a/include/net/tcp.h b/include/net/tcp.h index cce37694776e..f6cb20e6e524 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -954,6 +954,8 @@ struct rate_sample { u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ s32 delivered; /* number of packets delivered over interval */ long interval_us; /* time for tp->delivered to incr "delivered" */ + u32 snd_interval_us; /* snd interval for delivered packets */ + u32 rcv_interval_us; /* rcv interval for delivered packets */ long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ int losses; /* number of packets marked lost upon ACK */ u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index f8f4539f1135..97ff3c17ec4d 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -155,8 +155,8 @@ enum txtime_flags { }; struct sock_txtime { - clockid_t clockid; /* reference clockid */ - u32 flags; /* flags defined by enum txtime_flags */ + clockid_t clockid; /* reference clockid */ + __u32 flags; /* as defined by enum txtime_flags */ }; #endif /* _NET_TIMESTAMPING_H */ diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 949118461009..d9cc9dc4f547 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -955,4 +955,118 @@ enum { #define TCA_ETF_MAX (__TCA_ETF_MAX - 1) + +/* CAKE */ +enum { + TCA_CAKE_UNSPEC, + TCA_CAKE_PAD, + TCA_CAKE_BASE_RATE64, + TCA_CAKE_DIFFSERV_MODE, + TCA_CAKE_ATM, + TCA_CAKE_FLOW_MODE, + TCA_CAKE_OVERHEAD, + TCA_CAKE_RTT, + TCA_CAKE_TARGET, + TCA_CAKE_AUTORATE, + TCA_CAKE_MEMORY, + TCA_CAKE_NAT, + TCA_CAKE_RAW, + TCA_CAKE_WASH, + TCA_CAKE_MPU, + TCA_CAKE_INGRESS, + TCA_CAKE_ACK_FILTER, + TCA_CAKE_SPLIT_GSO, + __TCA_CAKE_MAX +}; +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) + +enum { + __TCA_CAKE_STATS_INVALID, + TCA_CAKE_STATS_PAD, + TCA_CAKE_STATS_CAPACITY_ESTIMATE64, + TCA_CAKE_STATS_MEMORY_LIMIT, + TCA_CAKE_STATS_MEMORY_USED, + TCA_CAKE_STATS_AVG_NETOFF, + TCA_CAKE_STATS_MIN_NETLEN, + TCA_CAKE_STATS_MAX_NETLEN, + TCA_CAKE_STATS_MIN_ADJLEN, + TCA_CAKE_STATS_MAX_ADJLEN, + TCA_CAKE_STATS_TIN_STATS, + TCA_CAKE_STATS_DEFICIT, + TCA_CAKE_STATS_COBALT_COUNT, + TCA_CAKE_STATS_DROPPING, + TCA_CAKE_STATS_DROP_NEXT_US, + TCA_CAKE_STATS_P_DROP, + TCA_CAKE_STATS_BLUE_TIMER_US, + __TCA_CAKE_STATS_MAX +}; +#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) + +enum { + __TCA_CAKE_TIN_STATS_INVALID, + TCA_CAKE_TIN_STATS_PAD, + TCA_CAKE_TIN_STATS_SENT_PACKETS, + TCA_CAKE_TIN_STATS_SENT_BYTES64, + TCA_CAKE_TIN_STATS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, + TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, + TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, + TCA_CAKE_TIN_STATS_BACKLOG_BYTES, + TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, + TCA_CAKE_TIN_STATS_TARGET_US, + TCA_CAKE_TIN_STATS_INTERVAL_US, + TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, + TCA_CAKE_TIN_STATS_WAY_MISSES, + TCA_CAKE_TIN_STATS_WAY_COLLISIONS, + TCA_CAKE_TIN_STATS_PEAK_DELAY_US, + TCA_CAKE_TIN_STATS_AVG_DELAY_US, + TCA_CAKE_TIN_STATS_BASE_DELAY_US, + TCA_CAKE_TIN_STATS_SPARSE_FLOWS, + TCA_CAKE_TIN_STATS_BULK_FLOWS, + TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, + TCA_CAKE_TIN_STATS_MAX_SKBLEN, + TCA_CAKE_TIN_STATS_FLOW_QUANTUM, + __TCA_CAKE_TIN_STATS_MAX +}; +#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) +#define TC_CAKE_MAX_TINS (8) + +enum { + CAKE_FLOW_NONE = 0, + CAKE_FLOW_SRC_IP, + CAKE_FLOW_DST_IP, + CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ + CAKE_FLOW_FLOWS, + CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ + CAKE_FLOW_MAX, +}; + +enum { + CAKE_DIFFSERV_DIFFSERV3 = 0, + CAKE_DIFFSERV_DIFFSERV4, + CAKE_DIFFSERV_DIFFSERV8, + CAKE_DIFFSERV_BESTEFFORT, + CAKE_DIFFSERV_PRECEDENCE, + CAKE_DIFFSERV_MAX +}; + +enum { + CAKE_ACK_NONE = 0, + CAKE_ACK_FILTER, + CAKE_ACK_AGGRESSIVE, + CAKE_ACK_MAX +}; + +enum { + CAKE_ATM_NONE = 0, + CAKE_ATM_ATM, + CAKE_ATM_PTM, + CAKE_ATM_MAX +}; + #endif diff --git a/net/core/dev.c b/net/core/dev.c index e18d81837a6c..1c3f0997e857 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4941,23 +4941,28 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo struct list_head sublist; struct sk_buff *skb, *next; + INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { struct net_device *orig_dev = skb->dev; struct packet_type *pt_prev = NULL; + list_del(&skb->list); __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); + if (!pt_prev) + continue; if (pt_curr != pt_prev || od_curr != orig_dev) { /* dispatch old sublist */ - list_cut_before(&sublist, head, &skb->list); __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr); /* start new sublist */ + INIT_LIST_HEAD(&sublist); pt_curr = pt_prev; od_curr = orig_dev; } + list_add_tail(&skb->list, &sublist); } /* dispatch final sublist */ - __netif_receive_skb_list_ptype(head, pt_curr, od_curr); + __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr); } static int __netif_receive_skb(struct sk_buff *skb) @@ -5093,25 +5098,30 @@ static void netif_receive_skb_list_internal(struct list_head *head) { struct bpf_prog *xdp_prog = NULL; struct sk_buff *skb, *next; + struct list_head sublist; + INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { net_timestamp_check(netdev_tstamp_prequeue, skb); - if (skb_defer_rx_timestamp(skb)) - /* Handled, remove from list */ - list_del(&skb->list); + list_del(&skb->list); + if (!skb_defer_rx_timestamp(skb)) + list_add_tail(&skb->list, &sublist); } + list_splice_init(&sublist, head); if (static_branch_unlikely(&generic_xdp_needed_key)) { preempt_disable(); rcu_read_lock(); list_for_each_entry_safe(skb, next, head, list) { xdp_prog = rcu_dereference(skb->dev->xdp_prog); - if (do_xdp_generic(xdp_prog, skb) != XDP_PASS) - /* Dropped, remove from list */ - list_del(&skb->list); + list_del(&skb->list); + if (do_xdp_generic(xdp_prog, skb) == XDP_PASS) + list_add_tail(&skb->list, &sublist); } rcu_read_unlock(); preempt_enable(); + /* Put passed packets back on main list */ + list_splice_init(&sublist, head); } rcu_read_lock(); @@ -5122,9 +5132,9 @@ static void netif_receive_skb_list_internal(struct list_head *head) int cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu >= 0) { - enqueue_to_backlog(skb, cpu, &rflow->last_qtail); - /* Handled, remove from list */ + /* Will be handled, remove from list */ list_del(&skb->list); + enqueue_to_backlog(skb, cpu, &rflow->last_qtail); } } } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c4082cd50257..e3704a49164b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1274,9 +1274,6 @@ restart: int linear; new_segment: - /* Allocate new segment. If the interface is SG, - * allocate skb fitting to single page. - */ if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index c61240e43923..4dff40dad4dc 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -146,6 +146,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); + /* Record both segment send and ack receive intervals */ + rs->snd_interval_us = snd_us; + rs->rcv_interval_us = ack_us; + /* Normally we expect interval_us >= min-rtt. * Note that rate may still be over-estimated when a spuriously * retransmistted skb was first (s)acked because "interval_us" diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 91580c62bb86..1659a6b3cf42 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -385,8 +385,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (ndev->cnf.stable_secret.initialized) ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; - else - ndev->cnf.addr_gen_mode = ipv6_devconf_dflt.addr_gen_mode; ndev->cnf.mtu6 = dev->mtu; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -5210,7 +5208,9 @@ static inline size_t inet6_ifla6_size(void) + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ - + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */ + + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */ + + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */ + + 0; } static inline size_t inet6_if_nlmsg_size(void) @@ -5892,32 +5892,31 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, loff_t *ppos) { int ret = 0; - int new_val; + u32 new_val; struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; struct net *net = (struct net *)ctl->extra2; + struct ctl_table tmp = { + .data = &new_val, + .maxlen = sizeof(new_val), + .mode = ctl->mode, + }; if (!rtnl_trylock()) return restart_syscall(); - ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + new_val = *((u32 *)ctl->data); - if (write) { - new_val = *((int *)ctl->data); + ret = proc_douintvec(&tmp, write, buffer, lenp, ppos); + if (ret != 0) + goto out; + if (write) { if (check_addr_gen_mode(new_val) < 0) { ret = -EINVAL; goto out; } - /* request for default */ - if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) { - ipv6_devconf_dflt.addr_gen_mode = new_val; - - /* request for individual net device */ - } else { - if (!idev) - goto out; - + if (idev) { if (check_stable_privacy(idev, net, new_val) < 0) { ret = -EINVAL; goto out; @@ -5927,7 +5926,21 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, idev->cnf.addr_gen_mode = new_val; addrconf_dev_config(idev->dev); } + } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { + struct net_device *dev; + + net->ipv6.devconf_dflt->addr_gen_mode = new_val; + for_each_netdev(net, dev) { + idev = __in6_dev_get(dev); + if (idev && + idev->cnf.addr_gen_mode != new_val) { + idev->cnf.addr_gen_mode = new_val; + addrconf_dev_config(idev->dev); + } + } } + + *((u32 *)ctl->data) = new_val; } out: diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 168af54db975..dc240cb47ddf 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -603,6 +603,21 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) } EXPORT_SYMBOL(nf_conntrack_destroy); +bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) +{ + struct nf_ct_hook *ct_hook; + bool ret = false; + + rcu_read_lock(); + ct_hook = rcu_dereference(nf_ct_hook); + if (ct_hook) + ret = ct_hook->get_tuple_skb(dst_tuple, skb); + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(nf_ct_get_tuple_skb); + /* Built-in default zone used e.g. by modules. */ const struct nf_conntrack_zone nf_ct_zone_dflt = { .id = NF_CT_DEFAULT_ZONE_ID, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3465da2a98bd..85ab2fd6a665 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1683,6 +1683,41 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb) return 0; } +static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, + const struct sk_buff *skb) +{ + const struct nf_conntrack_tuple *src_tuple; + const struct nf_conntrack_tuple_hash *hash; + struct nf_conntrack_tuple srctuple; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (ct) { + src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo)); + memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); + return true; + } + + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + NFPROTO_IPV4, dev_net(skb->dev), + &srctuple)) + return false; + + hash = nf_conntrack_find_get(dev_net(skb->dev), + &nf_ct_zone_dflt, + &srctuple); + if (!hash) + return false; + + ct = nf_ct_tuplehash_to_ctrack(hash); + src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir); + memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); + nf_ct_put(ct); + + return true; +} + /* Bring out ya dead! */ static struct nf_conn * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), @@ -2204,6 +2239,7 @@ err_cachep: static struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, .destroy = destroy_conntrack, + .get_tuple_skb = nf_conntrack_get_tuple_skb, }; void nf_conntrack_init_end(void) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index fcc89706745b..7af246764a35 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -295,6 +295,17 @@ config NET_SCH_FQ_CODEL If unsure, say N. +config NET_SCH_CAKE + tristate "Common Applications Kept Enhanced (CAKE)" + help + Say Y here if you want to use the Common Applications Kept Enhanced + (CAKE) queue management algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_cake. + + If unsure, say N. + config NET_SCH_FQ tristate "Fair Queue" help diff --git a/net/sched/Makefile b/net/sched/Makefile index 9a5a7077d217..673ee7d26ff2 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o +obj-$(CONFIG_NET_SCH_CAKE) += sch_cake.o obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 66dc19746c63..148a89ab789b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -149,10 +149,15 @@ EXPORT_SYMBOL(__tcf_idr_release); static size_t tcf_action_shared_attrs_size(const struct tc_action *act) { + struct tc_cookie *act_cookie; u32 cookie_len = 0; - if (act->act_cookie) - cookie_len = nla_total_size(act->act_cookie->len); + rcu_read_lock(); + act_cookie = rcu_dereference(act->act_cookie); + + if (act_cookie) + cookie_len = nla_total_size(act_cookie->len); + rcu_read_unlock(); return nla_total_size(0) /* action number nested */ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a3eef00cd711..3d6e265758c0 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -485,8 +485,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, return -ENOMEM; err = tcf_idr_check_alloc(tn, &parm->index, a, bind); - if (err < 0) + if (err < 0) { + kfree(p); return err; + } exists = err; if (exists && bind) { kfree(p); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 487a152a852c..8b2474293db1 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -605,20 +605,22 @@ static int fl_set_key(struct net *net, struct nlattr **tb, TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, &mask->vlan); - ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); - if (eth_type_vlan(ethertype)) { - fl_set_key_vlan(tb, ethertype, - TCA_FLOWER_KEY_CVLAN_ID, - TCA_FLOWER_KEY_CVLAN_PRIO, - &key->cvlan, &mask->cvlan); - fl_set_key_val(tb, &key->basic.n_proto, - TCA_FLOWER_KEY_CVLAN_ETH_TYPE, - &mask->basic.n_proto, - TCA_FLOWER_UNSPEC, - sizeof(key->basic.n_proto)); - } else { - key->basic.n_proto = ethertype; - mask->basic.n_proto = cpu_to_be16(~0); + if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { + ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); + if (eth_type_vlan(ethertype)) { + fl_set_key_vlan(tb, ethertype, + TCA_FLOWER_KEY_CVLAN_ID, + TCA_FLOWER_KEY_CVLAN_PRIO, + &key->cvlan, &mask->cvlan); + fl_set_key_val(tb, &key->basic.n_proto, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, + &mask->basic.n_proto, + TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); + } else { + key->basic.n_proto = ethertype; + mask->basic.n_proto = cpu_to_be16(~0); + } } } else { key->basic.n_proto = ethertype; @@ -1344,14 +1346,16 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, key->cvlan.vlan_tpid))) goto nla_put_failure; - if (mask->cvlan.vlan_tpid) { - if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, - key->basic.n_proto)) - goto nla_put_failure; - } else if (mask->vlan.vlan_tpid) { - if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, - key->basic.n_proto)) - goto nla_put_failure; + if (mask->basic.n_proto) { + if (mask->cvlan.vlan_tpid) { + if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, + key->basic.n_proto)) + goto nla_put_failure; + } else if (mask->vlan.vlan_tpid) { + if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, + key->basic.n_proto)) + goto nla_put_failure; + } } if ((key->basic.n_proto == htons(ETH_P_IP) || diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c new file mode 100644 index 000000000000..30695691e9ff --- /dev/null +++ b/net/sched/sch_cake.c @@ -0,0 +1,3019 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* COMMON Applications Kept Enhanced (CAKE) discipline + * + * Copyright (C) 2014-2018 Jonathan Morton <[email protected]> + * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <[email protected]> + * Copyright (C) 2014-2018 Dave Täht <[email protected]> + * Copyright (C) 2015-2018 Sebastian Moeller <[email protected]> + * (C) 2015-2018 Kevin Darbyshire-Bryant <[email protected]> + * Copyright (C) 2017-2018 Ryan Mounce <[email protected]> + * + * The CAKE Principles: + * (or, how to have your cake and eat it too) + * + * This is a combination of several shaping, AQM and FQ techniques into one + * easy-to-use package: + * + * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE + * equipment and bloated MACs. This operates in deficit mode (as in sch_fq), + * eliminating the need for any sort of burst parameter (eg. token bucket + * depth). Burst support is limited to that necessary to overcome scheduling + * latency. + * + * - A Diffserv-aware priority queue, giving more priority to certain classes, + * up to a specified fraction of bandwidth. Above that bandwidth threshold, + * the priority is reduced to avoid starving other tins. + * + * - Each priority tin has a separate Flow Queue system, to isolate traffic + * flows from each other. This prevents a burst on one flow from increasing + * the delay to another. Flows are distributed to queues using a + * set-associative hash function. + * + * - Each queue is actively managed by Cobalt, which is a combination of the + * Codel and Blue AQM algorithms. This serves flows fairly, and signals + * congestion early via ECN (if available) and/or packet drops, to keep + * latency low. The codel parameters are auto-tuned based on the bandwidth + * setting, as is necessary at low bandwidths. + * + * The configuration parameters are kept deliberately simple for ease of use. + * Everything has sane defaults. Complete generality of configuration is *not* + * a goal. + * + * The priority queue operates according to a weighted DRR scheme, combined with + * a bandwidth tracker which reuses the shaper logic to detect which side of the + * bandwidth sharing threshold the tin is operating. This determines whether a + * priority-based weight (high) or a bandwidth-based weight (low) is used for + * that tin in the current pass. + * + * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly + * granted us permission to leverage. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/jiffies.h> +#include <linux/string.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/reciprocal_div.h> +#include <net/netlink.h> +#include <linux/version.h> +#include <linux/if_vlan.h> +#include <net/pkt_sched.h> +#include <net/pkt_cls.h> +#include <net/tcp.h> +#include <net/flow_dissector.h> + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack_core.h> +#endif + +#define CAKE_SET_WAYS (8) +#define CAKE_MAX_TINS (8) +#define CAKE_QUEUES (1024) +#define CAKE_FLOW_MASK 63 +#define CAKE_FLOW_NAT_FLAG 64 +#define CAKE_SPLIT_GSO_THRESHOLD (125000000) /* 1Gbps */ + +/* struct cobalt_params - contains codel and blue parameters + * @interval: codel initial drop rate + * @target: maximum persistent sojourn time & blue update rate + * @mtu_time: serialisation delay of maximum-size packet + * @p_inc: increment of blue drop probability (0.32 fxp) + * @p_dec: decrement of blue drop probability (0.32 fxp) + */ +struct cobalt_params { + u64 interval; + u64 target; + u64 mtu_time; + u32 p_inc; + u32 p_dec; +}; + +/* struct cobalt_vars - contains codel and blue variables + * @count: codel dropping frequency + * @rec_inv_sqrt: reciprocal value of sqrt(count) >> 1 + * @drop_next: time to drop next packet, or when we dropped last + * @blue_timer: Blue time to next drop + * @p_drop: BLUE drop probability (0.32 fxp) + * @dropping: set if in dropping state + * @ecn_marked: set if marked + */ +struct cobalt_vars { + u32 count; + u32 rec_inv_sqrt; + ktime_t drop_next; + ktime_t blue_timer; + u32 p_drop; + bool dropping; + bool ecn_marked; +}; + +enum { + CAKE_SET_NONE = 0, + CAKE_SET_SPARSE, + CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */ + CAKE_SET_BULK, + CAKE_SET_DECAYING +}; + +struct cake_flow { + /* this stuff is all needed per-flow at dequeue time */ + struct sk_buff *head; + struct sk_buff *tail; + struct list_head flowchain; + s32 deficit; + u32 dropped; + struct cobalt_vars cvars; + u16 srchost; /* index into cake_host table */ + u16 dsthost; + u8 set; +}; /* please try to keep this structure <= 64 bytes */ + +struct cake_host { + u32 srchost_tag; + u32 dsthost_tag; + u16 srchost_refcnt; + u16 dsthost_refcnt; +}; + +struct cake_heap_entry { + u16 t:3, b:10; +}; + +struct cake_tin_data { + struct cake_flow flows[CAKE_QUEUES]; + u32 backlogs[CAKE_QUEUES]; + u32 tags[CAKE_QUEUES]; /* for set association */ + u16 overflow_idx[CAKE_QUEUES]; + struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */ + u16 flow_quantum; + + struct cobalt_params cparams; + u32 drop_overlimit; + u16 bulk_flow_count; + u16 sparse_flow_count; + u16 decaying_flow_count; + u16 unresponsive_flow_count; + + u32 max_skblen; + + struct list_head new_flows; + struct list_head old_flows; + struct list_head decaying_flows; + + /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ + ktime_t time_next_packet; + u64 tin_rate_ns; + u64 tin_rate_bps; + u16 tin_rate_shft; + + u16 tin_quantum_prio; + u16 tin_quantum_band; + s32 tin_deficit; + u32 tin_backlog; + u32 tin_dropped; + u32 tin_ecn_mark; + + u32 packets; + u64 bytes; + + u32 ack_drops; + + /* moving averages */ + u64 avge_delay; + u64 peak_delay; + u64 base_delay; + + /* hash function stats */ + u32 way_directs; + u32 way_hits; + u32 way_misses; + u32 way_collisions; +}; /* number of tins is small, so size of this struct doesn't matter much */ + +struct cake_sched_data { + struct tcf_proto __rcu *filter_list; /* optional external classifier */ + struct tcf_block *block; + struct cake_tin_data *tins; + + struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS]; + u16 overflow_timeout; + + u16 tin_cnt; + u8 tin_mode; + u8 flow_mode; + u8 ack_filter; + u8 atm_mode; + + /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ + u16 rate_shft; + ktime_t time_next_packet; + ktime_t failsafe_next_packet; + u64 rate_ns; + u64 rate_bps; + u16 rate_flags; + s16 rate_overhead; + u16 rate_mpu; + u64 interval; + u64 target; + + /* resource tracking */ + u32 buffer_used; + u32 buffer_max_used; + u32 buffer_limit; + u32 buffer_config_limit; + + /* indices for dequeue */ + u16 cur_tin; + u16 cur_flow; + + struct qdisc_watchdog watchdog; + const u8 *tin_index; + const u8 *tin_order; + + /* bandwidth capacity estimate */ + ktime_t last_packet_time; + ktime_t avg_window_begin; + u64 avg_packet_interval; + u64 avg_window_bytes; + u64 avg_peak_bandwidth; + ktime_t last_reconfig_time; + + /* packet length stats */ + u32 avg_netoff; + u16 max_netlen; + u16 max_adjlen; + u16 min_netlen; + u16 min_adjlen; +}; + +enum { + CAKE_FLAG_OVERHEAD = BIT(0), + CAKE_FLAG_AUTORATE_INGRESS = BIT(1), + CAKE_FLAG_INGRESS = BIT(2), + CAKE_FLAG_WASH = BIT(3), + CAKE_FLAG_SPLIT_GSO = BIT(4) +}; + +/* COBALT operates the Codel and BLUE algorithms in parallel, in order to + * obtain the best features of each. Codel is excellent on flows which + * respond to congestion signals in a TCP-like way. BLUE is more effective on + * unresponsive flows. + */ + +struct cobalt_skb_cb { + ktime_t enqueue_time; + u32 adjusted_len; +}; + +static u64 us_to_ns(u64 us) +{ + return us * NSEC_PER_USEC; +} + +static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb) +{ + qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb)); + return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data; +} + +static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb) +{ + return get_cobalt_cb(skb)->enqueue_time; +} + +static void cobalt_set_enqueue_time(struct sk_buff *skb, + ktime_t now) +{ + get_cobalt_cb(skb)->enqueue_time = now; +} + +static u16 quantum_div[CAKE_QUEUES + 1] = {0}; + +/* Diffserv lookup tables */ + +static const u8 precedence[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, +}; + +static const u8 diffserv8[] = { + 2, 5, 1, 2, 4, 2, 2, 2, + 0, 2, 1, 2, 1, 2, 1, 2, + 5, 2, 4, 2, 4, 2, 4, 2, + 3, 2, 3, 2, 3, 2, 3, 2, + 6, 2, 3, 2, 3, 2, 3, 2, + 6, 2, 2, 2, 6, 2, 6, 2, + 7, 2, 2, 2, 2, 2, 2, 2, + 7, 2, 2, 2, 2, 2, 2, 2, +}; + +static const u8 diffserv4[] = { + 0, 2, 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 2, 0, 2, 0, 2, 0, + 2, 0, 2, 0, 2, 0, 2, 0, + 3, 0, 2, 0, 2, 0, 2, 0, + 3, 0, 0, 0, 3, 0, 3, 0, + 3, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, +}; + +static const u8 diffserv3[] = { + 0, 0, 0, 0, 2, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 2, 0, + 2, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, 0, 0, +}; + +static const u8 besteffort[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* tin priority order for stats dumping */ + +static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7}; +static const u8 bulk_order[] = {1, 0, 2, 3}; + +#define REC_INV_SQRT_CACHE (16) +static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0}; + +/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots + * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) + * + * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 + */ + +static void cobalt_newton_step(struct cobalt_vars *vars) +{ + u32 invsqrt, invsqrt2; + u64 val; + + invsqrt = vars->rec_inv_sqrt; + invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; + val = (3LL << 32) - ((u64)vars->count * invsqrt2); + + val >>= 2; /* avoid overflow in following multiply */ + val = (val * invsqrt) >> (32 - 2 + 1); + + vars->rec_inv_sqrt = val; +} + +static void cobalt_invsqrt(struct cobalt_vars *vars) +{ + if (vars->count < REC_INV_SQRT_CACHE) + vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count]; + else + cobalt_newton_step(vars); +} + +/* There is a big difference in timing between the accurate values placed in + * the cache and the approximations given by a single Newton step for small + * count values, particularly when stepping from count 1 to 2 or vice versa. + * Above 16, a single Newton step gives sufficient accuracy in either + * direction, given the precision stored. + * + * The magnitude of the error when stepping up to count 2 is such as to give + * the value that *should* have been produced at count 4. + */ + +static void cobalt_cache_init(void) +{ + struct cobalt_vars v; + + memset(&v, 0, sizeof(v)); + v.rec_inv_sqrt = ~0U; + cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt; + + for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) { + cobalt_newton_step(&v); + cobalt_newton_step(&v); + cobalt_newton_step(&v); + cobalt_newton_step(&v); + + cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt; + } +} + +static void cobalt_vars_init(struct cobalt_vars *vars) +{ + memset(vars, 0, sizeof(*vars)); + + if (!cobalt_rec_inv_sqrt_cache[0]) { + cobalt_cache_init(); + cobalt_rec_inv_sqrt_cache[0] = ~0; + } +} + +/* CoDel control_law is t + interval/sqrt(count) + * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid + * both sqrt() and divide operation. + */ +static ktime_t cobalt_control(ktime_t t, + u64 interval, + u32 rec_inv_sqrt) +{ + return ktime_add_ns(t, reciprocal_scale(interval, + rec_inv_sqrt)); +} + +/* Call this when a packet had to be dropped due to queue overflow. Returns + * true if the BLUE state was quiescent before but active after this call. + */ +static bool cobalt_queue_full(struct cobalt_vars *vars, + struct cobalt_params *p, + ktime_t now) +{ + bool up = false; + + if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { + up = !vars->p_drop; + vars->p_drop += p->p_inc; + if (vars->p_drop < p->p_inc) + vars->p_drop = ~0; + vars->blue_timer = now; + } + vars->dropping = true; + vars->drop_next = now; + if (!vars->count) + vars->count = 1; + + return up; +} + +/* Call this when the queue was serviced but turned out to be empty. Returns + * true if the BLUE state was active before but quiescent after this call. + */ +static bool cobalt_queue_empty(struct cobalt_vars *vars, + struct cobalt_params *p, + ktime_t now) +{ + bool down = false; + + if (vars->p_drop && + ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { + if (vars->p_drop < p->p_dec) + vars->p_drop = 0; + else + vars->p_drop -= p->p_dec; + vars->blue_timer = now; + down = !vars->p_drop; + } + vars->dropping = false; + + if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) { + vars->count--; + cobalt_invsqrt(vars); + vars->drop_next = cobalt_control(vars->drop_next, + p->interval, + vars->rec_inv_sqrt); + } + + return down; +} + +/* Call this with a freshly dequeued packet for possible congestion marking. + * Returns true as an instruction to drop the packet, false for delivery. + */ +static bool cobalt_should_drop(struct cobalt_vars *vars, + struct cobalt_params *p, + ktime_t now, + struct sk_buff *skb, + u32 bulk_flows) +{ + bool next_due, over_target, drop = false; + ktime_t schedule; + u64 sojourn; + +/* The 'schedule' variable records, in its sign, whether 'now' is before or + * after 'drop_next'. This allows 'drop_next' to be updated before the next + * scheduling decision is actually branched, without destroying that + * information. Similarly, the first 'schedule' value calculated is preserved + * in the boolean 'next_due'. + * + * As for 'drop_next', we take advantage of the fact that 'interval' is both + * the delay between first exceeding 'target' and the first signalling event, + * *and* the scaling factor for the signalling frequency. It's therefore very + * natural to use a single mechanism for both purposes, and eliminates a + * significant amount of reference Codel's spaghetti code. To help with this, + * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close + * as possible to 1.0 in fixed-point. + */ + + sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); + schedule = ktime_sub(now, vars->drop_next); + over_target = sojourn > p->target && + sojourn > p->mtu_time * bulk_flows * 2 && + sojourn > p->mtu_time * 4; + next_due = vars->count && ktime_to_ns(schedule) >= 0; + + vars->ecn_marked = false; + + if (over_target) { + if (!vars->dropping) { + vars->dropping = true; + vars->drop_next = cobalt_control(now, + p->interval, + vars->rec_inv_sqrt); + } + if (!vars->count) + vars->count = 1; + } else if (vars->dropping) { + vars->dropping = false; + } + + if (next_due && vars->dropping) { + /* Use ECN mark if possible, otherwise drop */ + drop = !(vars->ecn_marked = INET_ECN_set_ce(skb)); + + vars->count++; + if (!vars->count) + vars->count--; + cobalt_invsqrt(vars); + vars->drop_next = cobalt_control(vars->drop_next, + p->interval, + vars->rec_inv_sqrt); + schedule = ktime_sub(now, vars->drop_next); + } else { + while (next_due) { + vars->count--; + cobalt_invsqrt(vars); + vars->drop_next = cobalt_control(vars->drop_next, + p->interval, + vars->rec_inv_sqrt); + schedule = ktime_sub(now, vars->drop_next); + next_due = vars->count && ktime_to_ns(schedule) >= 0; + } + } + + /* Simple BLUE implementation. Lack of ECN is deliberate. */ + if (vars->p_drop) + drop |= (prandom_u32() < vars->p_drop); + + /* Overload the drop_next field as an activity timeout */ + if (!vars->count) + vars->drop_next = ktime_add_ns(now, p->interval); + else if (ktime_to_ns(schedule) > 0 && !drop) + vars->drop_next = now; + + return drop; +} + +static void cake_update_flowkeys(struct flow_keys *keys, + const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + struct nf_conntrack_tuple tuple = {}; + bool rev = !skb->_nfct; + + if (tc_skb_protocol(skb) != htons(ETH_P_IP)) + return; + + if (!nf_ct_get_tuple_skb(&tuple, skb)) + return; + + keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; + keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; + + if (keys->ports.ports) { + keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all; + keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all; + } +#endif +} + +/* Cake has several subtle multiple bit settings. In these cases you + * would be matching triple isolate mode as well. + */ + +static bool cake_dsrc(int flow_mode) +{ + return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC; +} + +static bool cake_ddst(int flow_mode) +{ + return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST; +} + +static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, + int flow_mode) +{ + u32 flow_hash = 0, srchost_hash, dsthost_hash; + u16 reduced_hash, srchost_idx, dsthost_idx; + struct flow_keys keys, host_keys; + + if (unlikely(flow_mode == CAKE_FLOW_NONE)) + return 0; + + skb_flow_dissect_flow_keys(skb, &keys, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + + if (flow_mode & CAKE_FLOW_NAT_FLAG) + cake_update_flowkeys(&keys, skb); + + /* flow_hash_from_keys() sorts the addresses by value, so we have + * to preserve their order in a separate data structure to treat + * src and dst host addresses as independently selectable. + */ + host_keys = keys; + host_keys.ports.ports = 0; + host_keys.basic.ip_proto = 0; + host_keys.keyid.keyid = 0; + host_keys.tags.flow_label = 0; + + switch (host_keys.control.addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + host_keys.addrs.v4addrs.src = 0; + dsthost_hash = flow_hash_from_keys(&host_keys); + host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + host_keys.addrs.v4addrs.dst = 0; + srchost_hash = flow_hash_from_keys(&host_keys); + break; + + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + memset(&host_keys.addrs.v6addrs.src, 0, + sizeof(host_keys.addrs.v6addrs.src)); + dsthost_hash = flow_hash_from_keys(&host_keys); + host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + memset(&host_keys.addrs.v6addrs.dst, 0, + sizeof(host_keys.addrs.v6addrs.dst)); + srchost_hash = flow_hash_from_keys(&host_keys); + break; + + default: + dsthost_hash = 0; + srchost_hash = 0; + } + + /* This *must* be after the above switch, since as a + * side-effect it sorts the src and dst addresses. + */ + if (flow_mode & CAKE_FLOW_FLOWS) + flow_hash = flow_hash_from_keys(&keys); + + if (!(flow_mode & CAKE_FLOW_FLOWS)) { + if (flow_mode & CAKE_FLOW_SRC_IP) + flow_hash ^= srchost_hash; + + if (flow_mode & CAKE_FLOW_DST_IP) + flow_hash ^= dsthost_hash; + } + + reduced_hash = flow_hash % CAKE_QUEUES; + + /* set-associative hashing */ + /* fast path if no hash collision (direct lookup succeeds) */ + if (likely(q->tags[reduced_hash] == flow_hash && + q->flows[reduced_hash].set)) { + q->way_directs++; + } else { + u32 inner_hash = reduced_hash % CAKE_SET_WAYS; + u32 outer_hash = reduced_hash - inner_hash; + bool allocate_src = false; + bool allocate_dst = false; + u32 i, k; + + /* check if any active queue in the set is reserved for + * this flow. + */ + for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; + i++, k = (k + 1) % CAKE_SET_WAYS) { + if (q->tags[outer_hash + k] == flow_hash) { + if (i) + q->way_hits++; + + if (!q->flows[outer_hash + k].set) { + /* need to increment host refcnts */ + allocate_src = cake_dsrc(flow_mode); + allocate_dst = cake_ddst(flow_mode); + } + + goto found; + } + } + + /* no queue is reserved for this flow, look for an + * empty one. + */ + for (i = 0; i < CAKE_SET_WAYS; + i++, k = (k + 1) % CAKE_SET_WAYS) { + if (!q->flows[outer_hash + k].set) { + q->way_misses++; + allocate_src = cake_dsrc(flow_mode); + allocate_dst = cake_ddst(flow_mode); + goto found; + } + } + + /* With no empty queues, default to the original + * queue, accept the collision, update the host tags. + */ + q->way_collisions++; + q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--; + q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--; + allocate_src = cake_dsrc(flow_mode); + allocate_dst = cake_ddst(flow_mode); +found: + /* reserve queue for future packets in same flow */ + reduced_hash = outer_hash + k; + q->tags[reduced_hash] = flow_hash; + + if (allocate_src) { + srchost_idx = srchost_hash % CAKE_QUEUES; + inner_hash = srchost_idx % CAKE_SET_WAYS; + outer_hash = srchost_idx - inner_hash; + for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; + i++, k = (k + 1) % CAKE_SET_WAYS) { + if (q->hosts[outer_hash + k].srchost_tag == + srchost_hash) + goto found_src; + } + for (i = 0; i < CAKE_SET_WAYS; + i++, k = (k + 1) % CAKE_SET_WAYS) { + if (!q->hosts[outer_hash + k].srchost_refcnt) + break; + } + q->hosts[outer_hash + k].srchost_tag = srchost_hash; +found_src: + srchost_idx = outer_hash + k; + q->hosts[srchost_idx].srchost_refcnt++; + q->flows[reduced_hash].srchost = srchost_idx; + } + + if (allocate_dst) { + dsthost_idx = dsthost_hash % CAKE_QUEUES; + inner_hash = dsthost_idx % CAKE_SET_WAYS; + outer_hash = dsthost_idx - inner_hash; + for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; + i++, k = (k + 1) % CAKE_SET_WAYS) { + if (q->hosts[outer_hash + k].dsthost_tag == + dsthost_hash) + goto found_dst; + } + for (i = 0; i < CAKE_SET_WAYS; + i++, k = (k + 1) % CAKE_SET_WAYS) { + if (!q->hosts[outer_hash + k].dsthost_refcnt) + break; + } + q->hosts[outer_hash + k].dsthost_tag = dsthost_hash; +found_dst: + dsthost_idx = outer_hash + k; + q->hosts[dsthost_idx].dsthost_refcnt++; + q->flows[reduced_hash].dsthost = dsthost_idx; + } + } + + return reduced_hash; +} + +/* helper functions : might be changed when/if skb use a standard list_head */ +/* remove one skb from head of slot queue */ + +static struct sk_buff *dequeue_head(struct cake_flow *flow) +{ + struct sk_buff *skb = flow->head; + + if (skb) { + flow->head = skb->next; + skb->next = NULL; + } + + return skb; +} + +/* add skb to flow queue (tail add) */ + +static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb) +{ + if (!flow->head) + flow->head = skb; + else + flow->tail->next = skb; + flow->tail = skb; + skb->next = NULL; +} + +static struct iphdr *cake_get_iphdr(const struct sk_buff *skb, + struct ipv6hdr *buf) +{ + unsigned int offset = skb_network_offset(skb); + struct iphdr *iph; + + iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf); + + if (!iph) + return NULL; + + if (iph->version == 4 && iph->protocol == IPPROTO_IPV6) + return skb_header_pointer(skb, offset + iph->ihl * 4, + sizeof(struct ipv6hdr), buf); + + else if (iph->version == 4) + return iph; + + else if (iph->version == 6) + return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr), + buf); + + return NULL; +} + +static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb, + void *buf, unsigned int bufsize) +{ + unsigned int offset = skb_network_offset(skb); + const struct ipv6hdr *ipv6h; + const struct tcphdr *tcph; + const struct iphdr *iph; + struct ipv6hdr _ipv6h; + struct tcphdr _tcph; + + ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); + + if (!ipv6h) + return NULL; + + if (ipv6h->version == 4) { + iph = (struct iphdr *)ipv6h; + offset += iph->ihl * 4; + + /* special-case 6in4 tunnelling, as that is a common way to get + * v6 connectivity in the home + */ + if (iph->protocol == IPPROTO_IPV6) { + ipv6h = skb_header_pointer(skb, offset, + sizeof(_ipv6h), &_ipv6h); + + if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP) + return NULL; + + offset += sizeof(struct ipv6hdr); + + } else if (iph->protocol != IPPROTO_TCP) { + return NULL; + } + + } else if (ipv6h->version == 6) { + if (ipv6h->nexthdr != IPPROTO_TCP) + return NULL; + + offset += sizeof(struct ipv6hdr); + } else { + return NULL; + } + + tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (!tcph) + return NULL; + + return skb_header_pointer(skb, offset, + min(__tcp_hdrlen(tcph), bufsize), buf); +} + +static const void *cake_get_tcpopt(const struct tcphdr *tcph, + int code, int *oplen) +{ + /* inspired by tcp_parse_options in tcp_input.c */ + int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr); + const u8 *ptr = (const u8 *)(tcph + 1); + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + if (opcode == TCPOPT_EOL) + break; + if (opcode == TCPOPT_NOP) { + length--; + continue; + } + opsize = *ptr++; + if (opsize < 2 || opsize > length) + break; + + if (opcode == code) { + *oplen = opsize; + return ptr; + } + + ptr += opsize - 2; + length -= opsize; + } + + return NULL; +} + +/* Compare two SACK sequences. A sequence is considered greater if it SACKs more + * bytes than the other. In the case where both sequences ACKs bytes that the + * other doesn't, A is considered greater. DSACKs in A also makes A be + * considered greater. + * + * @return -1, 0 or 1 as normal compare functions + */ +static int cake_tcph_sack_compare(const struct tcphdr *tcph_a, + const struct tcphdr *tcph_b) +{ + const struct tcp_sack_block_wire *sack_a, *sack_b; + u32 ack_seq_a = ntohl(tcph_a->ack_seq); + u32 bytes_a = 0, bytes_b = 0; + int oplen_a, oplen_b; + bool first = true; + + sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a); + sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b); + + /* pointers point to option contents */ + oplen_a -= TCPOLEN_SACK_BASE; + oplen_b -= TCPOLEN_SACK_BASE; + + if (sack_a && oplen_a >= sizeof(*sack_a) && + (!sack_b || oplen_b < sizeof(*sack_b))) + return -1; + else if (sack_b && oplen_b >= sizeof(*sack_b) && + (!sack_a || oplen_a < sizeof(*sack_a))) + return 1; + else if ((!sack_a || oplen_a < sizeof(*sack_a)) && + (!sack_b || oplen_b < sizeof(*sack_b))) + return 0; + + while (oplen_a >= sizeof(*sack_a)) { + const struct tcp_sack_block_wire *sack_tmp = sack_b; + u32 start_a = get_unaligned_be32(&sack_a->start_seq); + u32 end_a = get_unaligned_be32(&sack_a->end_seq); + int oplen_tmp = oplen_b; + bool found = false; + + /* DSACK; always considered greater to prevent dropping */ + if (before(start_a, ack_seq_a)) + return -1; + + bytes_a += end_a - start_a; + + while (oplen_tmp >= sizeof(*sack_tmp)) { + u32 start_b = get_unaligned_be32(&sack_tmp->start_seq); + u32 end_b = get_unaligned_be32(&sack_tmp->end_seq); + + /* first time through we count the total size */ + if (first) + bytes_b += end_b - start_b; + + if (!after(start_b, start_a) && !before(end_b, end_a)) { + found = true; + if (!first) + break; + } + oplen_tmp -= sizeof(*sack_tmp); + sack_tmp++; + } + + if (!found) + return -1; + + oplen_a -= sizeof(*sack_a); + sack_a++; + first = false; + } + + /* If we made it this far, all ranges SACKed by A are covered by B, so + * either the SACKs are equal, or B SACKs more bytes. + */ + return bytes_b > bytes_a ? 1 : 0; +} + +static void cake_tcph_get_tstamp(const struct tcphdr *tcph, + u32 *tsval, u32 *tsecr) +{ + const u8 *ptr; + int opsize; + + ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize); + + if (ptr && opsize == TCPOLEN_TIMESTAMP) { + *tsval = get_unaligned_be32(ptr); + *tsecr = get_unaligned_be32(ptr + 4); + } +} + +static bool cake_tcph_may_drop(const struct tcphdr *tcph, + u32 tstamp_new, u32 tsecr_new) +{ + /* inspired by tcp_parse_options in tcp_input.c */ + int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr); + const u8 *ptr = (const u8 *)(tcph + 1); + u32 tstamp, tsecr; + + /* 3 reserved flags must be unset to avoid future breakage + * ACK must be set + * ECE/CWR are handled separately + * All other flags URG/PSH/RST/SYN/FIN must be unset + * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero) + * 0x00C00000 = CWR/ECE (handled separately) + * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000 + */ + if (((tcp_flag_word(tcph) & + cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK)) + return false; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + if (opcode == TCPOPT_EOL) + break; + if (opcode == TCPOPT_NOP) { + length--; + continue; + } + opsize = *ptr++; + if (opsize < 2 || opsize > length) + break; + + switch (opcode) { + case TCPOPT_MD5SIG: /* doesn't influence state */ + break; + + case TCPOPT_SACK: /* stricter checking performed later */ + if (opsize % 8 != 2) + return false; + break; + + case TCPOPT_TIMESTAMP: + /* only drop timestamps lower than new */ + if (opsize != TCPOLEN_TIMESTAMP) + return false; + tstamp = get_unaligned_be32(ptr); + tsecr = get_unaligned_be32(ptr + 4); + if (after(tstamp, tstamp_new) || + after(tsecr, tsecr_new)) + return false; + break; + + case TCPOPT_MSS: /* these should only be set on SYN */ + case TCPOPT_WINDOW: + case TCPOPT_SACK_PERM: + case TCPOPT_FASTOPEN: + case TCPOPT_EXP: + default: /* don't drop if any unknown options are present */ + return false; + } + + ptr += opsize - 2; + length -= opsize; + } + + return true; +} + +static struct sk_buff *cake_ack_filter(struct cake_sched_data *q, + struct cake_flow *flow) +{ + bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE; + struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL; + struct sk_buff *skb_check, *skb_prev = NULL; + const struct ipv6hdr *ipv6h, *ipv6h_check; + unsigned char _tcph[64], _tcph_check[64]; + const struct tcphdr *tcph, *tcph_check; + const struct iphdr *iph, *iph_check; + struct ipv6hdr _iph, _iph_check; + const struct sk_buff *skb; + int seglen, num_found = 0; + u32 tstamp = 0, tsecr = 0; + __be32 elig_flags = 0; + int sack_comp; + + /* no other possible ACKs to filter */ + if (flow->head == flow->tail) + return NULL; + + skb = flow->tail; + tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph)); + iph = cake_get_iphdr(skb, &_iph); + if (!tcph) + return NULL; + + cake_tcph_get_tstamp(tcph, &tstamp, &tsecr); + + /* the 'triggering' packet need only have the ACK flag set. + * also check that SYN is not set, as there won't be any previous ACKs. + */ + if ((tcp_flag_word(tcph) & + (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK) + return NULL; + + /* the 'triggering' ACK is at the tail of the queue, we have already + * returned if it is the only packet in the flow. loop through the rest + * of the queue looking for pure ACKs with the same 5-tuple as the + * triggering one. + */ + for (skb_check = flow->head; + skb_check && skb_check != skb; + skb_prev = skb_check, skb_check = skb_check->next) { + iph_check = cake_get_iphdr(skb_check, &_iph_check); + tcph_check = cake_get_tcphdr(skb_check, &_tcph_check, + sizeof(_tcph_check)); + + /* only TCP packets with matching 5-tuple are eligible, and only + * drop safe headers + */ + if (!tcph_check || iph->version != iph_check->version || + tcph_check->source != tcph->source || + tcph_check->dest != tcph->dest) + continue; + + if (iph_check->version == 4) { + if (iph_check->saddr != iph->saddr || + iph_check->daddr != iph->daddr) + continue; + + seglen = ntohs(iph_check->tot_len) - + (4 * iph_check->ihl); + } else if (iph_check->version == 6) { + ipv6h = (struct ipv6hdr *)iph; + ipv6h_check = (struct ipv6hdr *)iph_check; + + if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) || + ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr)) + continue; + + seglen = ntohs(ipv6h_check->payload_len); + } else { + WARN_ON(1); /* shouldn't happen */ + continue; + } + + /* If the ECE/CWR flags changed from the previous eligible + * packet in the same flow, we should no longer be dropping that + * previous packet as this would lose information. + */ + if (elig_ack && (tcp_flag_word(tcph_check) & + (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) { + elig_ack = NULL; + elig_ack_prev = NULL; + num_found--; + } + + /* Check TCP options and flags, don't drop ACKs with segment + * data, and don't drop ACKs with a higher cumulative ACK + * counter than the triggering packet. Check ACK seqno here to + * avoid parsing SACK options of packets we are going to exclude + * anyway. + */ + if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) || + (seglen - __tcp_hdrlen(tcph_check)) != 0 || + after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq))) + continue; + + /* Check SACK options. The triggering packet must SACK more data + * than the ACK under consideration, or SACK the same range but + * have a larger cumulative ACK counter. The latter is a + * pathological case, but is contained in the following check + * anyway, just to be safe. + */ + sack_comp = cake_tcph_sack_compare(tcph_check, tcph); + + if (sack_comp < 0 || + (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) && + sack_comp == 0)) + continue; + + /* At this point we have found an eligible pure ACK to drop; if + * we are in aggressive mode, we are done. Otherwise, keep + * searching unless this is the second eligible ACK we + * found. + * + * Since we want to drop ACK closest to the head of the queue, + * save the first eligible ACK we find, even if we need to loop + * again. + */ + if (!elig_ack) { + elig_ack = skb_check; + elig_ack_prev = skb_prev; + elig_flags = (tcp_flag_word(tcph_check) + & (TCP_FLAG_ECE | TCP_FLAG_CWR)); + } + + if (num_found++ > 0) + goto found; + } + + /* We made it through the queue without finding two eligible ACKs . If + * we found a single eligible ACK we can drop it in aggressive mode if + * we can guarantee that this does not interfere with ECN flag + * information. We ensure this by dropping it only if the enqueued + * packet is consecutive with the eligible ACK, and their flags match. + */ + if (elig_ack && aggressive && elig_ack->next == skb && + (elig_flags == (tcp_flag_word(tcph) & + (TCP_FLAG_ECE | TCP_FLAG_CWR)))) + goto found; + + return NULL; + +found: + if (elig_ack_prev) + elig_ack_prev->next = elig_ack->next; + else + flow->head = elig_ack->next; + + elig_ack->next = NULL; + + return elig_ack; +} + +static u64 cake_ewma(u64 avg, u64 sample, u32 shift) +{ + avg -= avg >> shift; + avg += sample >> shift; + return avg; +} + +static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off) +{ + if (q->rate_flags & CAKE_FLAG_OVERHEAD) + len -= off; + + if (q->max_netlen < len) + q->max_netlen = len; + if (q->min_netlen > len) + q->min_netlen = len; + + len += q->rate_overhead; + + if (len < q->rate_mpu) + len = q->rate_mpu; + + if (q->atm_mode == CAKE_ATM_ATM) { + len += 47; + len /= 48; + len *= 53; + } else if (q->atm_mode == CAKE_ATM_PTM) { + /* Add one byte per 64 bytes or part thereof. + * This is conservative and easier to calculate than the + * precise value. + */ + len += (len + 63) / 64; + } + + if (q->max_adjlen < len) + q->max_adjlen = len; + if (q->min_adjlen > len) + q->min_adjlen = len; + + return len; +} + +static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int hdr_len, last_len = 0; + u32 off = skb_network_offset(skb); + u32 len = qdisc_pkt_len(skb); + u16 segs = 1; + + q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8); + + if (!shinfo->gso_size) + return cake_calc_overhead(q, len, off); + + /* borrowed from qdisc_pkt_len_init() */ + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + /* + transport layer */ + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | + SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; + + th = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else { + struct udphdr _udphdr; + + if (skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); + } + + if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) + segs = DIV_ROUND_UP(skb->len - hdr_len, + shinfo->gso_size); + else + segs = shinfo->gso_segs; + + len = shinfo->gso_size + hdr_len; + last_len = skb->len - shinfo->gso_size * (segs - 1); + + return (cake_calc_overhead(q, len, off) * (segs - 1) + + cake_calc_overhead(q, last_len, off)); +} + +static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j) +{ + struct cake_heap_entry ii = q->overflow_heap[i]; + struct cake_heap_entry jj = q->overflow_heap[j]; + + q->overflow_heap[i] = jj; + q->overflow_heap[j] = ii; + + q->tins[ii.t].overflow_idx[ii.b] = j; + q->tins[jj.t].overflow_idx[jj.b] = i; +} + +static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i) +{ + struct cake_heap_entry ii = q->overflow_heap[i]; + + return q->tins[ii.t].backlogs[ii.b]; +} + +static void cake_heapify(struct cake_sched_data *q, u16 i) +{ + static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES; + u32 mb = cake_heap_get_backlog(q, i); + u32 m = i; + + while (m < a) { + u32 l = m + m + 1; + u32 r = l + 1; + + if (l < a) { + u32 lb = cake_heap_get_backlog(q, l); + + if (lb > mb) { + m = l; + mb = lb; + } + } + + if (r < a) { + u32 rb = cake_heap_get_backlog(q, r); + + if (rb > mb) { + m = r; + mb = rb; + } + } + + if (m != i) { + cake_heap_swap(q, i, m); + i = m; + } else { + break; + } + } +} + +static void cake_heapify_up(struct cake_sched_data *q, u16 i) +{ + while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) { + u16 p = (i - 1) >> 1; + u32 ib = cake_heap_get_backlog(q, i); + u32 pb = cake_heap_get_backlog(q, p); + + if (ib > pb) { + cake_heap_swap(q, i, p); + i = p; + } else { + break; + } + } +} + +static int cake_advance_shaper(struct cake_sched_data *q, + struct cake_tin_data *b, + struct sk_buff *skb, + ktime_t now, bool drop) +{ + u32 len = get_cobalt_cb(skb)->adjusted_len; + + /* charge packet bandwidth to this tin + * and to the global shaper. + */ + if (q->rate_ns) { + u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft; + u64 global_dur = (len * q->rate_ns) >> q->rate_shft; + u64 failsafe_dur = global_dur + (global_dur >> 1); + + if (ktime_before(b->time_next_packet, now)) + b->time_next_packet = ktime_add_ns(b->time_next_packet, + tin_dur); + + else if (ktime_before(b->time_next_packet, + ktime_add_ns(now, tin_dur))) + b->time_next_packet = ktime_add_ns(now, tin_dur); + + q->time_next_packet = ktime_add_ns(q->time_next_packet, + global_dur); + if (!drop) + q->failsafe_next_packet = \ + ktime_add_ns(q->failsafe_next_packet, + failsafe_dur); + } + return len; +} + +static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) +{ + struct cake_sched_data *q = qdisc_priv(sch); + ktime_t now = ktime_get(); + u32 idx = 0, tin = 0, len; + struct cake_heap_entry qq; + struct cake_tin_data *b; + struct cake_flow *flow; + struct sk_buff *skb; + + if (!q->overflow_timeout) { + int i; + /* Build fresh max-heap */ + for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--) + cake_heapify(q, i); + } + q->overflow_timeout = 65535; + + /* select longest queue for pruning */ + qq = q->overflow_heap[0]; + tin = qq.t; + idx = qq.b; + + b = &q->tins[tin]; + flow = &b->flows[idx]; + skb = dequeue_head(flow); + if (unlikely(!skb)) { + /* heap has gone wrong, rebuild it next time */ + q->overflow_timeout = 0; + return idx + (tin << 16); + } + + if (cobalt_queue_full(&flow->cvars, &b->cparams, now)) + b->unresponsive_flow_count++; + + len = qdisc_pkt_len(skb); + q->buffer_used -= skb->truesize; + b->backlogs[idx] -= len; + b->tin_backlog -= len; + sch->qstats.backlog -= len; + qdisc_tree_reduce_backlog(sch, 1, len); + + flow->dropped++; + b->tin_dropped++; + sch->qstats.drops++; + + if (q->rate_flags & CAKE_FLAG_INGRESS) + cake_advance_shaper(q, b, skb, now, true); + + __qdisc_drop(skb, to_free); + sch->q.qlen--; + + cake_heapify(q, 0); + + return idx + (tin << 16); +} + +static void cake_wash_diffserv(struct sk_buff *skb) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); + break; + case htons(ETH_P_IPV6): + ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); + break; + default: + break; + } +} + +static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) +{ + u8 dscp; + + switch (skb->protocol) { + case htons(ETH_P_IP): + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + if (wash && dscp) + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); + return dscp; + + case htons(ETH_P_IPV6): + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + if (wash && dscp) + ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); + return dscp; + + case htons(ETH_P_ARP): + return 0x38; /* CS7 - Net Control */ + + default: + /* If there is no Diffserv field, treat as best-effort */ + return 0; + } +} + +static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, + struct sk_buff *skb) +{ + struct cake_sched_data *q = qdisc_priv(sch); + u32 tin; + + if (TC_H_MAJ(skb->priority) == sch->handle && + TC_H_MIN(skb->priority) > 0 && + TC_H_MIN(skb->priority) <= q->tin_cnt) { + tin = TC_H_MIN(skb->priority) - 1; + + if (q->rate_flags & CAKE_FLAG_WASH) + cake_wash_diffserv(skb); + } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) { + /* extract the Diffserv Precedence field, if it exists */ + /* and clear DSCP bits if washing */ + tin = q->tin_index[cake_handle_diffserv(skb, + q->rate_flags & CAKE_FLAG_WASH)]; + if (unlikely(tin >= q->tin_cnt)) + tin = 0; + } else { + tin = 0; + if (q->rate_flags & CAKE_FLAG_WASH) + cake_wash_diffserv(skb); + } + + return &q->tins[tin]; +} + +static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, + struct sk_buff *skb, int flow_mode, int *qerr) +{ + struct cake_sched_data *q = qdisc_priv(sch); + struct tcf_proto *filter; + struct tcf_result res; + u32 flow = 0; + int result; + + filter = rcu_dereference_bh(q->filter_list); + if (!filter) + goto hash; + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tcf_classify(skb, filter, &res, false); + + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + case TC_ACT_TRAP: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ + case TC_ACT_SHOT: + return 0; + } +#endif + if (TC_H_MIN(res.classid) <= CAKE_QUEUES) + flow = TC_H_MIN(res.classid); + } +hash: + *t = cake_select_tin(sch, skb); + return flow ?: cake_hash(*t, skb, flow_mode) + 1; +} + +static void cake_reconfigure(struct Qdisc *sch); + +static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct cake_sched_data *q = qdisc_priv(sch); + int len = qdisc_pkt_len(skb); + int uninitialized_var(ret); + struct sk_buff *ack = NULL; + ktime_t now = ktime_get(); + struct cake_tin_data *b; + struct cake_flow *flow; + u32 idx; + + /* choose flow to insert into */ + idx = cake_classify(sch, &b, skb, q->flow_mode, &ret); + if (idx == 0) { + if (ret & __NET_XMIT_BYPASS) + qdisc_qstats_drop(sch); + __qdisc_drop(skb, to_free); + return ret; + } + idx--; + flow = &b->flows[idx]; + + /* ensure shaper state isn't stale */ + if (!b->tin_backlog) { + if (ktime_before(b->time_next_packet, now)) + b->time_next_packet = now; + + if (!sch->q.qlen) { + if (ktime_before(q->time_next_packet, now)) { + q->failsafe_next_packet = now; + q->time_next_packet = now; + } else if (ktime_after(q->time_next_packet, now) && + ktime_after(q->failsafe_next_packet, now)) { + u64 next = \ + min(ktime_to_ns(q->time_next_packet), + ktime_to_ns( + q->failsafe_next_packet)); + sch->qstats.overlimits++; + qdisc_watchdog_schedule_ns(&q->watchdog, next); + } + } + } + + if (unlikely(len > b->max_skblen)) + b->max_skblen = len; + + if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) { + struct sk_buff *segs, *nskb; + netdev_features_t features = netif_skb_features(skb); + unsigned int slen = 0; + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) + return qdisc_drop(skb, sch, to_free); + + while (segs) { + nskb = segs->next; + segs->next = NULL; + qdisc_skb_cb(segs)->pkt_len = segs->len; + cobalt_set_enqueue_time(segs, now); + get_cobalt_cb(segs)->adjusted_len = cake_overhead(q, + segs); + flow_queue_add(flow, segs); + + sch->q.qlen++; + slen += segs->len; + q->buffer_used += segs->truesize; + b->packets++; + segs = nskb; + } + + /* stats */ + b->bytes += slen; + b->backlogs[idx] += slen; + b->tin_backlog += slen; + sch->qstats.backlog += slen; + q->avg_window_bytes += slen; + + qdisc_tree_reduce_backlog(sch, 1, len); + consume_skb(skb); + } else { + /* not splitting */ + cobalt_set_enqueue_time(skb, now); + get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb); + flow_queue_add(flow, skb); + + if (q->ack_filter) + ack = cake_ack_filter(q, flow); + + if (ack) { + b->ack_drops++; + sch->qstats.drops++; + b->bytes += qdisc_pkt_len(ack); + len -= qdisc_pkt_len(ack); + q->buffer_used += skb->truesize - ack->truesize; + if (q->rate_flags & CAKE_FLAG_INGRESS) + cake_advance_shaper(q, b, ack, now, true); + + qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack)); + consume_skb(ack); + } else { + sch->q.qlen++; + q->buffer_used += skb->truesize; + } + + /* stats */ + b->packets++; + b->bytes += len; + b->backlogs[idx] += len; + b->tin_backlog += len; + sch->qstats.backlog += len; + q->avg_window_bytes += len; + } + + if (q->overflow_timeout) + cake_heapify_up(q, b->overflow_idx[idx]); + + /* incoming bandwidth capacity estimate */ + if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) { + u64 packet_interval = \ + ktime_to_ns(ktime_sub(now, q->last_packet_time)); + + if (packet_interval > NSEC_PER_SEC) + packet_interval = NSEC_PER_SEC; + + /* filter out short-term bursts, eg. wifi aggregation */ + q->avg_packet_interval = \ + cake_ewma(q->avg_packet_interval, + packet_interval, + (packet_interval > q->avg_packet_interval ? + 2 : 8)); + + q->last_packet_time = now; + + if (packet_interval > q->avg_packet_interval) { + u64 window_interval = \ + ktime_to_ns(ktime_sub(now, + q->avg_window_begin)); + u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC; + + do_div(b, window_interval); + q->avg_peak_bandwidth = + cake_ewma(q->avg_peak_bandwidth, b, + b > q->avg_peak_bandwidth ? 2 : 8); + q->avg_window_bytes = 0; + q->avg_window_begin = now; + + if (ktime_after(now, + ktime_add_ms(q->last_reconfig_time, + 250))) { + q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4; + cake_reconfigure(sch); + } + } + } else { + q->avg_window_bytes = 0; + q->last_packet_time = now; + } + + /* flowchain */ + if (!flow->set || flow->set == CAKE_SET_DECAYING) { + struct cake_host *srchost = &b->hosts[flow->srchost]; + struct cake_host *dsthost = &b->hosts[flow->dsthost]; + u16 host_load = 1; + + if (!flow->set) { + list_add_tail(&flow->flowchain, &b->new_flows); + } else { + b->decaying_flow_count--; + list_move_tail(&flow->flowchain, &b->new_flows); + } + flow->set = CAKE_SET_SPARSE; + b->sparse_flow_count++; + + if (cake_dsrc(q->flow_mode)) + host_load = max(host_load, srchost->srchost_refcnt); + + if (cake_ddst(q->flow_mode)) + host_load = max(host_load, dsthost->dsthost_refcnt); + + flow->deficit = (b->flow_quantum * + quantum_div[host_load]) >> 16; + } else if (flow->set == CAKE_SET_SPARSE_WAIT) { + /* this flow was empty, accounted as a sparse flow, but actually + * in the bulk rotation. + */ + flow->set = CAKE_SET_BULK; + b->sparse_flow_count--; + b->bulk_flow_count++; + } + + if (q->buffer_used > q->buffer_max_used) + q->buffer_max_used = q->buffer_used; + + if (q->buffer_used > q->buffer_limit) { + u32 dropped = 0; + + while (q->buffer_used > q->buffer_limit) { + dropped++; + cake_drop(sch, to_free); + } + b->drop_overlimit += dropped; + } + return NET_XMIT_SUCCESS; +} + +static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) +{ + struct cake_sched_data *q = qdisc_priv(sch); + struct cake_tin_data *b = &q->tins[q->cur_tin]; + struct cake_flow *flow = &b->flows[q->cur_flow]; + struct sk_buff *skb = NULL; + u32 len; + + if (flow->head) { + skb = dequeue_head(flow); + len = qdisc_pkt_len(skb); + b->backlogs[q->cur_flow] -= len; + b->tin_backlog -= len; + sch->qstats.backlog -= len; + q->buffer_used -= skb->truesize; + sch->q.qlen--; + + if (q->overflow_timeout) + cake_heapify(q, b->overflow_idx[q->cur_flow]); + } + return skb; +} + +/* Discard leftover packets from a tin no longer in use. */ +static void cake_clear_tin(struct Qdisc *sch, u16 tin) +{ + struct cake_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + + q->cur_tin = tin; + for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++) + while (!!(skb = cake_dequeue_one(sch))) + kfree_skb(skb); +} + +static struct sk_buff *cake_dequeue(struct Qdisc *sch) +{ + struct cake_sched_data *q = qdisc_priv(sch); + struct cake_tin_data *b = &q->tins[q->cur_tin]; + struct cake_host *srchost, *dsthost; + ktime_t now = ktime_get(); + struct cake_flow *flow; + struct list_head *head; + bool first_flow = true; + struct sk_buff *skb; + u16 host_load; + u64 delay; + u32 len; + +begin: + if (!sch->q.qlen) + return NULL; + + /* global hard shaper */ + if (ktime_after(q->time_next_packet, now) && + ktime_after(q->failsafe_next_packet, now)) { + u64 next = min(ktime_to_ns(q->time_next_packet), + ktime_to_ns(q->failsafe_next_packet)); + + sch->qstats.overlimits++; + qdisc_watchdog_schedule_ns(&q->watchdog, next); + return NULL; + } + + /* Choose a class to work on. */ + if (!q->rate_ns) { + /* In unlimited mode, can't rely on shaper timings, just balance + * with DRR + */ + bool wrapped = false, empty = true; + + while (b->tin_deficit < 0 || + !(b->sparse_flow_count + b->bulk_flow_count)) { + if (b->tin_deficit <= 0) + b->tin_deficit += b->tin_quantum_band; + if (b->sparse_flow_count + b->bulk_flow_count) + empty = false; + + q->cur_tin++; + b++; + if (q->cur_tin >= q->tin_cnt) { + q->cur_tin = 0; + b = q->tins; + + if (wrapped) { + /* It's possible for q->qlen to be + * nonzero when we actually have no + * packets anywhere. + */ + if (empty) + return NULL; + } else { + wrapped = true; + } + } + } + } else { + /* In shaped mode, choose: + * - Highest-priority tin with queue and meeting schedule, or + * - The earliest-scheduled tin with queue. + */ + ktime_t best_time = KTIME_MAX; + int tin, best_tin = 0; + + for (tin = 0; tin < q->tin_cnt; tin++) { + b = q->tins + tin; + if ((b->sparse_flow_count + b->bulk_flow_count) > 0) { + ktime_t time_to_pkt = \ + ktime_sub(b->time_next_packet, now); + + if (ktime_to_ns(time_to_pkt) <= 0 || + ktime_compare(time_to_pkt, + best_time) <= 0) { + best_time = time_to_pkt; + best_tin = tin; + } + } + } + + q->cur_tin = best_tin; + b = q->tins + best_tin; + + /* No point in going further if no packets to deliver. */ + if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count))) + return NULL; + } + +retry: + /* service this class */ + head = &b->decaying_flows; + if (!first_flow || list_empty(head)) { + head = &b->new_flows; + if (list_empty(head)) { + head = &b->old_flows; + if (unlikely(list_empty(head))) { + head = &b->decaying_flows; + if (unlikely(list_empty(head))) + goto begin; + } + } + } + flow = list_first_entry(head, struct cake_flow, flowchain); + q->cur_flow = flow - b->flows; + first_flow = false; + + /* triple isolation (modified DRR++) */ + srchost = &b->hosts[flow->srchost]; + dsthost = &b->hosts[flow->dsthost]; + host_load = 1; + + if (cake_dsrc(q->flow_mode)) + host_load = max(host_load, srchost->srchost_refcnt); + + if (cake_ddst(q->flow_mode)) + host_load = max(host_load, dsthost->dsthost_refcnt); + + WARN_ON(host_load > CAKE_QUEUES); + + /* flow isolation (DRR++) */ + if (flow->deficit <= 0) { + /* The shifted prandom_u32() is a way to apply dithering to + * avoid accumulating roundoff errors + */ + flow->deficit += (b->flow_quantum * quantum_div[host_load] + + (prandom_u32() >> 16)) >> 16; + list_move_tail(&flow->flowchain, &b->old_flows); + + /* Keep all flows with deficits out of the sparse and decaying + * rotations. No non-empty flow can go into the decaying + * rotation, so they can't get deficits + */ + if (flow->set == CAKE_SET_SPARSE) { + if (flow->head) { + b->sparse_flow_count--; + b->bulk_flow_count++; + flow->set = CAKE_SET_BULK; + } else { + /* we've moved it to the bulk rotation for + * correct deficit accounting but we still want + * to count it as a sparse flow, not a bulk one. + */ + flow->set = CAKE_SET_SPARSE_WAIT; + } + } + goto retry; + } + + /* Retrieve a packet via the AQM */ + while (1) { + skb = cake_dequeue_one(sch); + if (!skb) { + /* this queue was actually empty */ + if (cobalt_queue_empty(&flow->cvars, &b->cparams, now)) + b->unresponsive_flow_count--; + + if (flow->cvars.p_drop || flow->cvars.count || + ktime_before(now, flow->cvars.drop_next)) { + /* keep in the flowchain until the state has + * decayed to rest + */ + list_move_tail(&flow->flowchain, + &b->decaying_flows); + if (flow->set == CAKE_SET_BULK) { + b->bulk_flow_count--; + b->decaying_flow_count++; + } else if (flow->set == CAKE_SET_SPARSE || + flow->set == CAKE_SET_SPARSE_WAIT) { + b->sparse_flow_count--; + b->decaying_flow_count++; + } + flow->set = CAKE_SET_DECAYING; + } else { + /* remove empty queue from the flowchain */ + list_del_init(&flow->flowchain); + if (flow->set == CAKE_SET_SPARSE || + flow->set == CAKE_SET_SPARSE_WAIT) + b->sparse_flow_count--; + else if (flow->set == CAKE_SET_BULK) + b->bulk_flow_count--; + else + b->decaying_flow_count--; + + flow->set = CAKE_SET_NONE; + srchost->srchost_refcnt--; + dsthost->dsthost_refcnt--; + } + goto begin; + } + + /* Last packet in queue may be marked, shouldn't be dropped */ + if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, + (b->bulk_flow_count * + !!(q->rate_flags & + CAKE_FLAG_INGRESS))) || + !flow->head) + break; + + /* drop this packet, get another one */ + if (q->rate_flags & CAKE_FLAG_INGRESS) { + len = cake_advance_shaper(q, b, skb, + now, true); + flow->deficit -= len; + b->tin_deficit -= len; + } + flow->dropped++; + b->tin_dropped++; + qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); + qdisc_qstats_drop(sch); + kfree_skb(skb); + if (q->rate_flags & CAKE_FLAG_INGRESS) + goto retry; + } + + b->tin_ecn_mark += !!flow->cvars.ecn_marked; + qdisc_bstats_update(sch, skb); + + /* collect delay stats */ + delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); + b->avge_delay = cake_ewma(b->avge_delay, delay, 8); + b->peak_delay = cake_ewma(b->peak_delay, delay, + delay > b->peak_delay ? 2 : 8); + b->base_delay = cake_ewma(b->base_delay, delay, + delay < b->base_delay ? 2 : 8); + + len = cake_advance_shaper(q, b, skb, now, false); + flow->deficit -= len; + b->tin_deficit -= len; + + if (ktime_after(q->time_next_packet, now) && sch->q.qlen) { + u64 next = min(ktime_to_ns(q->time_next_packet), + ktime_to_ns(q->failsafe_next_packet)); + + qdisc_watchdog_schedule_ns(&q->watchdog, next); + } else if (!sch->q.qlen) { + int i; + + for (i = 0; i < q->tin_cnt; i++) { + if (q->tins[i].decaying_flow_count) { + ktime_t next = \ + ktime_add_ns(now, + q->tins[i].cparams.target); + + qdisc_watchdog_schedule_ns(&q->watchdog, + ktime_to_ns(next)); + break; + } + } + } + + if (q->overflow_timeout) + q->overflow_timeout--; + + return skb; +} + +static void cake_reset(struct Qdisc *sch) +{ + u32 c; + + for (c = 0; c < CAKE_MAX_TINS; c++) + cake_clear_tin(sch, c); +} + +static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = { + [TCA_CAKE_BASE_RATE64] = { .type = NLA_U64 }, + [TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 }, + [TCA_CAKE_ATM] = { .type = NLA_U32 }, + [TCA_CAKE_FLOW_MODE] = { .type = NLA_U32 }, + [TCA_CAKE_OVERHEAD] = { .type = NLA_S32 }, + [TCA_CAKE_RTT] = { .type = NLA_U32 }, + [TCA_CAKE_TARGET] = { .type = NLA_U32 }, + [TCA_CAKE_AUTORATE] = { .type = NLA_U32 }, + [TCA_CAKE_MEMORY] = { .type = NLA_U32 }, + [TCA_CAKE_NAT] = { .type = NLA_U32 }, + [TCA_CAKE_RAW] = { .type = NLA_U32 }, + [TCA_CAKE_WASH] = { .type = NLA_U32 }, + [TCA_CAKE_MPU] = { .type = NLA_U32 }, + [TCA_CAKE_INGRESS] = { .type = NLA_U32 }, + [TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 }, +}; + +static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, + u64 target_ns, u64 rtt_est_ns) +{ + /* convert byte-rate into time-per-byte + * so it will always unwedge in reasonable time. + */ + static const u64 MIN_RATE = 64; + u32 byte_target = mtu; + u64 byte_target_ns; + u8 rate_shft = 0; + u64 rate_ns = 0; + + b->flow_quantum = 1514; + if (rate) { + b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL); + rate_shft = 34; + rate_ns = ((u64)NSEC_PER_SEC) << rate_shft; + rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate)); + while (!!(rate_ns >> 34)) { + rate_ns >>= 1; + rate_shft--; + } + } /* else unlimited, ie. zero delay */ + + b->tin_rate_bps = rate; + b->tin_rate_ns = rate_ns; + b->tin_rate_shft = rate_shft; + + byte_target_ns = (byte_target * rate_ns) >> rate_shft; + + b->cparams.target = max((byte_target_ns * 3) / 2, target_ns); + b->cparams.interval = max(rtt_est_ns + + b->cparams.target - target_ns, + b->cparams.target * 2); + b->cparams.mtu_time = byte_target_ns; + b->cparams.p_inc = 1 << 24; /* 1/256 */ + b->cparams.p_dec = 1 << 20; /* 1/4096 */ +} + +static int cake_config_besteffort(struct Qdisc *sch) +{ + struct cake_sched_data *q = qdisc_priv(sch); + struct cake_tin_data *b = &q->tins[0]; + u32 mtu = psched_mtu(qdisc_dev(sch)); + u64 rate = q->rate_bps; + + q->tin_cnt = 1; + + q->tin_index = besteffort; + q->tin_order = normal_order; + + cake_set_rate(b, rate, mtu, + us_to_ns(q->target), us_to_ns(q->interval)); + b->tin_quantum_band = 65535; + b->tin_quantum_prio = 65535; + + return 0; +} + +static int cake_config_precedence(struct Qdisc *sch) +{ + /* convert high-level (user visible) parameters into internal format */ + struct cake_sched_data *q = qdisc_priv(sch); + u32 mtu = psched_mtu(qdisc_dev(sch)); + u64 rate = q->rate_bps; + u32 quantum1 = 256; + u32 quantum2 = 256; + u32 i; + + q->tin_cnt = 8; + q->tin_index = precedence; + q->tin_order = normal_order; + + for (i = 0; i < q->tin_cnt; i++) { + struct cake_tin_data *b = &q->tins[i]; + + cake_set_rate(b, rate, mtu, us_to_ns(q->target), + us_to_ns(q->interval)); + + b->tin_quantum_prio = max_t(u16, 1U, quantum1); + b->tin_quantum_band = max_t(u16, 1U, quantum2); + + /* calculate next class's parameters */ + rate *= 7; + rate >>= 3; + + quantum1 *= 3; + quantum1 >>= 1; + + quantum2 *= 7; + quantum2 >>= 3; + } + + return 0; +} + +/* List of known Diffserv codepoints: + * + * Least Effort (CS1) + * Best Effort (CS0) + * Max Reliability & LLT "Lo" (TOS1) + * Max Throughput (TOS2) + * Min Delay (TOS4) + * LLT "La" (TOS5) + * Assured Forwarding 1 (AF1x) - x3 + * Assured Forwarding 2 (AF2x) - x3 + * Assured Forwarding 3 (AF3x) - x3 + * Assured Forwarding 4 (AF4x) - x3 + * Precedence Class 2 (CS2) + * Precedence Class 3 (CS3) + * Precedence Class 4 (CS4) + * Precedence Class 5 (CS5) + * Precedence Class 6 (CS6) + * Precedence Class 7 (CS7) + * Voice Admit (VA) + * Expedited Forwarding (EF) + + * Total 25 codepoints. + */ + +/* List of traffic classes in RFC 4594: + * (roughly descending order of contended priority) + * (roughly ascending order of uncontended throughput) + * + * Network Control (CS6,CS7) - routing traffic + * Telephony (EF,VA) - aka. VoIP streams + * Signalling (CS5) - VoIP setup + * Multimedia Conferencing (AF4x) - aka. video calls + * Realtime Interactive (CS4) - eg. games + * Multimedia Streaming (AF3x) - eg. YouTube, NetFlix, Twitch + * Broadcast Video (CS3) + * Low Latency Data (AF2x,TOS4) - eg. database + * Ops, Admin, Management (CS2,TOS1) - eg. ssh + * Standard Service (CS0 & unrecognised codepoints) + * High Throughput Data (AF1x,TOS2) - eg. web traffic + * Low Priority Data (CS1) - eg. BitTorrent + + * Total 12 traffic classes. + */ + +static int cake_config_diffserv8(struct Qdisc *sch) +{ +/* Pruned list of traffic classes for typical applications: + * + * Network Control (CS6, CS7) + * Minimum Latency (EF, VA, CS5, CS4) + * Interactive Shell (CS2, TOS1) + * Low Latency Transactions (AF2x, TOS4) + * Video Streaming (AF4x, AF3x, CS3) + * Bog Standard (CS0 etc.) + * High Throughput (AF1x, TOS2) + * Background Traffic (CS1) + * + * Total 8 traffic classes. + */ + + struct cake_sched_data *q = qdisc_priv(sch); + u32 mtu = psched_mtu(qdisc_dev(sch)); + u64 rate = q->rate_bps; + u32 quantum1 = 256; + u32 quantum2 = 256; + u32 i; + + q->tin_cnt = 8; + + /* codepoint to class mapping */ + q->tin_index = diffserv8; + q->tin_order = normal_order; + + /* class characteristics */ + for (i = 0; i < q->tin_cnt; i++) { + struct cake_tin_data *b = &q->tins[i]; + + cake_set_rate(b, rate, mtu, us_to_ns(q->target), + us_to_ns(q->interval)); + + b->tin_quantum_prio = max_t(u16, 1U, quantum1); + b->tin_quantum_band = max_t(u16, 1U, quantum2); + + /* calculate next class's parameters */ + rate *= 7; + rate >>= 3; + + quantum1 *= 3; + quantum1 >>= 1; + + quantum2 *= 7; + quantum2 >>= 3; + } + + return 0; +} + +static int cake_config_diffserv4(struct Qdisc *sch) +{ +/* Further pruned list of traffic classes for four-class system: + * + * Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4) + * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1) + * Best Effort (CS0, AF1x, TOS2, and those not specified) + * Background Traffic (CS1) + * + * Total 4 traffic classes. + */ + + struct cake_sched_data *q = qdisc_priv(sch); + u32 mtu = psched_mtu(qdisc_dev(sch)); + u64 rate = q->rate_bps; + u32 quantum = 1024; + + q->tin_cnt = 4; + + /* codepoint to class mapping */ + q->tin_index = diffserv4; + q->tin_order = bulk_order; + + /* class characteristics */ + cake_set_rate(&q->tins[0], rate, mtu, + us_to_ns(q->target), us_to_ns(q->interval)); + cake_set_rate(&q->tins[1], rate >> 4, mtu, + us_to_ns(q->target), us_to_ns(q->interval)); + cake_set_rate(&q->tins[2], rate >> 1, mtu, + us_to_ns(q->target), us_to_ns(q->interval)); + cake_set_rate(&q->tins[3], rate >> 2, mtu, + us_to_ns(q->target), us_to_ns(q->interval)); + + /* priority weights */ + q->tins[0].tin_quantum_prio = quantum; + q->tins[1].tin_quantum_prio = quantum >> 4; + q->tins[2].tin_quantum_prio = quantum << 2; + q->tins[3].tin_quantum_prio = quantum << 4; + + /* bandwidth-sharing weights */ + q->tins[0].tin_quantum_band = quantum; + q->tins[1].tin_quantum_band = quantum >> 4; + q->tins[2].tin_quantum_band = quantum >> 1; + q->tins[3].tin_quantum_band = quantum >> 2; + + return 0; +} + +static int cake_config_diffserv3(struct Qdisc *sch) +{ +/* Simplified Diffserv structure with 3 tins. + * Low Priority (CS1) + * Best Effort + * Latency Sensitive (TOS4, VA, EF, CS6, CS7) + */ + struct cake_sched_data *q = qdisc_priv(sch); + u32 mtu = psched_mtu(qdisc_dev(sch)); + u64 rate = q->rate_bps; + u32 quantum = 1024; + + q->tin_cnt = 3; + + /* codepoint to class mapping */ + q->tin_index = diffserv3; + q->tin_order = bulk_order; + + /* class characteristics */ + cake_set_rate(&q->tins[0], rate, mtu, + us_to_ns(q->target), us_to_ns(q->interval)); + cake_set_rate(&q->tins[1], rate >> 4, mtu, + us_to_ns(q->target), us_to_ns(q->interval)); + cake_set_rate(&q->tins[2], rate >> 2, mtu, + us_to_ns(q->target), us_to_ns(q->interval)); + + /* priority weights */ + q->tins[0].tin_quantum_prio = quantum; + q->tins[1].tin_quantum_prio = quantum >> 4; + q->tins[2].tin_quantum_prio = quantum << 4; + + /* bandwidth-sharing weights */ + q->tins[0].tin_quantum_band = quantum; + q->tins[1].tin_quantum_band = quantum >> 4; + q->tins[2].tin_quantum_band = quantum >> 2; + + return 0; +} + +static void cake_reconfigure(struct Qdisc *sch) +{ + struct cake_sched_data *q = qdisc_priv(sch); + int c, ft; + + switch (q->tin_mode) { + case CAKE_DIFFSERV_BESTEFFORT: + ft = cake_config_besteffort(sch); + break; + + case CAKE_DIFFSERV_PRECEDENCE: + ft = cake_config_precedence(sch); + break; + + case CAKE_DIFFSERV_DIFFSERV8: + ft = cake_config_diffserv8(sch); + break; + + case CAKE_DIFFSERV_DIFFSERV4: + ft = cake_config_diffserv4(sch); + break; + + case CAKE_DIFFSERV_DIFFSERV3: + default: + ft = cake_config_diffserv3(sch); + break; + } + + for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) { + cake_clear_tin(sch, c); + q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time; + } + + q->rate_ns = q->tins[ft].tin_rate_ns; + q->rate_shft = q->tins[ft].tin_rate_shft; + + if (q->buffer_config_limit) { + q->buffer_limit = q->buffer_config_limit; + } else if (q->rate_bps) { + u64 t = q->rate_bps * q->interval; + + do_div(t, USEC_PER_SEC / 4); + q->buffer_limit = max_t(u32, t, 4U << 20); + } else { + q->buffer_limit = ~0; + } + + sch->flags &= ~TCQ_F_CAN_BYPASS; + + q->buffer_limit = min(q->buffer_limit, + max(sch->limit * psched_mtu(qdisc_dev(sch)), + q->buffer_config_limit)); +} + +static int cake_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct cake_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_CAKE_MAX + 1]; + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack); + if (err < 0) + return err; + + if (tb[TCA_CAKE_NAT]) { +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + q->flow_mode &= ~CAKE_FLOW_NAT_FLAG; + q->flow_mode |= CAKE_FLOW_NAT_FLAG * + !!nla_get_u32(tb[TCA_CAKE_NAT]); +#else + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT], + "No conntrack support in kernel"); + return -EOPNOTSUPP; +#endif + } + + if (tb[TCA_CAKE_BASE_RATE64]) + q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]); + + if (tb[TCA_CAKE_DIFFSERV_MODE]) + q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]); + + if (tb[TCA_CAKE_WASH]) { + if (!!nla_get_u32(tb[TCA_CAKE_WASH])) + q->rate_flags |= CAKE_FLAG_WASH; + else + q->rate_flags &= ~CAKE_FLAG_WASH; + } + + if (tb[TCA_CAKE_FLOW_MODE]) + q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) | + (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) & + CAKE_FLOW_MASK)); + + if (tb[TCA_CAKE_ATM]) + q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]); + + if (tb[TCA_CAKE_OVERHEAD]) { + q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]); + q->rate_flags |= CAKE_FLAG_OVERHEAD; + + q->max_netlen = 0; + q->max_adjlen = 0; + q->min_netlen = ~0; + q->min_adjlen = ~0; + } + + if (tb[TCA_CAKE_RAW]) { + q->rate_flags &= ~CAKE_FLAG_OVERHEAD; + + q->max_netlen = 0; + q->max_adjlen = 0; + q->min_netlen = ~0; + q->min_adjlen = ~0; + } + + if (tb[TCA_CAKE_MPU]) + q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]); + + if (tb[TCA_CAKE_RTT]) { + q->interval = nla_get_u32(tb[TCA_CAKE_RTT]); + + if (!q->interval) + q->interval = 1; + } + + if (tb[TCA_CAKE_TARGET]) { + q->target = nla_get_u32(tb[TCA_CAKE_TARGET]); + + if (!q->target) + q->target = 1; + } + + if (tb[TCA_CAKE_AUTORATE]) { + if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE])) + q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS; + else + q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS; + } + + if (tb[TCA_CAKE_INGRESS]) { + if (!!nla_get_u32(tb[TCA_CAKE_INGRESS])) + q->rate_flags |= CAKE_FLAG_INGRESS; + else + q->rate_flags &= ~CAKE_FLAG_INGRESS; + } + + if (tb[TCA_CAKE_ACK_FILTER]) + q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]); + + if (tb[TCA_CAKE_MEMORY]) + q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]); + + if (q->rate_bps && q->rate_bps <= CAKE_SPLIT_GSO_THRESHOLD) + q->rate_flags |= CAKE_FLAG_SPLIT_GSO; + else + q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO; + + if (q->tins) { + sch_tree_lock(sch); + cake_reconfigure(sch); + sch_tree_unlock(sch); + } + + return 0; +} + +static void cake_destroy(struct Qdisc *sch) +{ + struct cake_sched_data *q = qdisc_priv(sch); + + qdisc_watchdog_cancel(&q->watchdog); + tcf_block_put(q->block); + kvfree(q->tins); +} + +static int cake_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct cake_sched_data *q = qdisc_priv(sch); + int i, j, err; + + sch->limit = 10240; + q->tin_mode = CAKE_DIFFSERV_DIFFSERV3; + q->flow_mode = CAKE_FLOW_TRIPLE; + + q->rate_bps = 0; /* unlimited by default */ + + q->interval = 100000; /* 100ms default */ + q->target = 5000; /* 5ms: codel RFC argues + * for 5 to 10% of interval + */ + + q->cur_tin = 0; + q->cur_flow = 0; + + qdisc_watchdog_init(&q->watchdog, sch); + + if (opt) { + int err = cake_change(sch, opt, extack); + + if (err) + return err; + } + + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); + if (err) + return err; + + quantum_div[0] = ~0; + for (i = 1; i <= CAKE_QUEUES; i++) + quantum_div[i] = 65535 / i; + + q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data), + GFP_KERNEL); + if (!q->tins) + goto nomem; + + for (i = 0; i < CAKE_MAX_TINS; i++) { + struct cake_tin_data *b = q->tins + i; + + INIT_LIST_HEAD(&b->new_flows); + INIT_LIST_HEAD(&b->old_flows); + INIT_LIST_HEAD(&b->decaying_flows); + b->sparse_flow_count = 0; + b->bulk_flow_count = 0; + b->decaying_flow_count = 0; + + for (j = 0; j < CAKE_QUEUES; j++) { + struct cake_flow *flow = b->flows + j; + u32 k = j * CAKE_MAX_TINS + i; + + INIT_LIST_HEAD(&flow->flowchain); + cobalt_vars_init(&flow->cvars); + + q->overflow_heap[k].t = i; + q->overflow_heap[k].b = j; + b->overflow_idx[j] = k; + } + } + + cake_reconfigure(sch); + q->avg_peak_bandwidth = q->rate_bps; + q->min_netlen = ~0; + q->min_adjlen = ~0; + return 0; + +nomem: + cake_destroy(sch); + return -ENOMEM; +} + +static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct cake_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (!opts) + goto nla_put_failure; + + if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps, + TCA_CAKE_PAD)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, + q->flow_mode & CAKE_FLOW_MASK)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_AUTORATE, + !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS))) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_INGRESS, + !!(q->rate_flags & CAKE_FLAG_INGRESS))) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_NAT, + !!(q->flow_mode & CAKE_FLOW_NAT_FLAG))) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_WASH, + !!(q->rate_flags & CAKE_FLAG_WASH))) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead)) + goto nla_put_failure; + + if (!(q->rate_flags & CAKE_FLAG_OVERHEAD)) + if (nla_put_u32(skb, TCA_CAKE_RAW, 0)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO, + !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) + goto nla_put_failure; + + return nla_nest_end(skb, opts); + +nla_put_failure: + return -1; +} + +static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP); + struct cake_sched_data *q = qdisc_priv(sch); + struct nlattr *tstats, *ts; + int i; + + if (!stats) + return -1; + +#define PUT_STAT_U32(attr, data) do { \ + if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ + goto nla_put_failure; \ + } while (0) +#define PUT_STAT_U64(attr, data) do { \ + if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \ + data, TCA_CAKE_STATS_PAD)) \ + goto nla_put_failure; \ + } while (0) + + PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth); + PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit); + PUT_STAT_U32(MEMORY_USED, q->buffer_max_used); + PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16)); + PUT_STAT_U32(MAX_NETLEN, q->max_netlen); + PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen); + PUT_STAT_U32(MIN_NETLEN, q->min_netlen); + PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen); + +#undef PUT_STAT_U32 +#undef PUT_STAT_U64 + + tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS); + if (!tstats) + goto nla_put_failure; + +#define PUT_TSTAT_U32(attr, data) do { \ + if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \ + goto nla_put_failure; \ + } while (0) +#define PUT_TSTAT_U64(attr, data) do { \ + if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \ + data, TCA_CAKE_TIN_STATS_PAD)) \ + goto nla_put_failure; \ + } while (0) + + for (i = 0; i < q->tin_cnt; i++) { + struct cake_tin_data *b = &q->tins[q->tin_order[i]]; + + ts = nla_nest_start(d->skb, i + 1); + if (!ts) + goto nla_put_failure; + + PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps); + PUT_TSTAT_U64(SENT_BYTES64, b->bytes); + PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog); + + PUT_TSTAT_U32(TARGET_US, + ktime_to_us(ns_to_ktime(b->cparams.target))); + PUT_TSTAT_U32(INTERVAL_US, + ktime_to_us(ns_to_ktime(b->cparams.interval))); + + PUT_TSTAT_U32(SENT_PACKETS, b->packets); + PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped); + PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark); + PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops); + + PUT_TSTAT_U32(PEAK_DELAY_US, + ktime_to_us(ns_to_ktime(b->peak_delay))); + PUT_TSTAT_U32(AVG_DELAY_US, + ktime_to_us(ns_to_ktime(b->avge_delay))); + PUT_TSTAT_U32(BASE_DELAY_US, + ktime_to_us(ns_to_ktime(b->base_delay))); + + PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits); + PUT_TSTAT_U32(WAY_MISSES, b->way_misses); + PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions); + + PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count + + b->decaying_flow_count); + PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count); + PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count); + PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen); + + PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum); + nla_nest_end(d->skb, ts); + } + +#undef PUT_TSTAT_U32 +#undef PUT_TSTAT_U64 + + nla_nest_end(d->skb, tstats); + return nla_nest_end(d->skb, stats); + +nla_put_failure: + nla_nest_cancel(d->skb, stats); + return -1; +} + +static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg) +{ + return NULL; +} + +static unsigned long cake_find(struct Qdisc *sch, u32 classid) +{ + return 0; +} + +static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return 0; +} + +static void cake_unbind(struct Qdisc *q, unsigned long cl) +{ +} + +static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) +{ + struct cake_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return q->block; +} + +static int cake_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct cake_sched_data *q = qdisc_priv(sch); + const struct cake_flow *flow = NULL; + struct gnet_stats_queue qs = { 0 }; + struct nlattr *stats; + u32 idx = cl - 1; + + if (idx < CAKE_QUEUES * q->tin_cnt) { + const struct cake_tin_data *b = \ + &q->tins[q->tin_order[idx / CAKE_QUEUES]]; + const struct sk_buff *skb; + + flow = &b->flows[idx % CAKE_QUEUES]; + + if (flow->head) { + sch_tree_lock(sch); + skb = flow->head; + while (skb) { + qs.qlen++; + skb = skb->next; + } + sch_tree_unlock(sch); + } + qs.backlog = b->backlogs[idx % CAKE_QUEUES]; + qs.drops = flow->dropped; + } + if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) + return -1; + if (flow) { + ktime_t now = ktime_get(); + + stats = nla_nest_start(d->skb, TCA_STATS_APP); + if (!stats) + return -1; + +#define PUT_STAT_U32(attr, data) do { \ + if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ + goto nla_put_failure; \ + } while (0) +#define PUT_STAT_S32(attr, data) do { \ + if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ + goto nla_put_failure; \ + } while (0) + + PUT_STAT_S32(DEFICIT, flow->deficit); + PUT_STAT_U32(DROPPING, flow->cvars.dropping); + PUT_STAT_U32(COBALT_COUNT, flow->cvars.count); + PUT_STAT_U32(P_DROP, flow->cvars.p_drop); + if (flow->cvars.p_drop) { + PUT_STAT_S32(BLUE_TIMER_US, + ktime_to_us( + ktime_sub(now, + flow->cvars.blue_timer))); + } + if (flow->cvars.dropping) { + PUT_STAT_S32(DROP_NEXT_US, + ktime_to_us( + ktime_sub(now, + flow->cvars.drop_next))); + } + + if (nla_nest_end(d->skb, stats) < 0) + return -1; + } + + return 0; + +nla_put_failure: + nla_nest_cancel(d->skb, stats); + return -1; +} + +static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct cake_sched_data *q = qdisc_priv(sch); + unsigned int i, j; + + if (arg->stop) + return; + + for (i = 0; i < q->tin_cnt; i++) { + struct cake_tin_data *b = &q->tins[q->tin_order[i]]; + + for (j = 0; j < CAKE_QUEUES; j++) { + if (list_empty(&b->flows[j].flowchain) || + arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } + } +} + +static const struct Qdisc_class_ops cake_class_ops = { + .leaf = cake_leaf, + .find = cake_find, + .tcf_block = cake_tcf_block, + .bind_tcf = cake_bind, + .unbind_tcf = cake_unbind, + .dump = cake_dump_class, + .dump_stats = cake_dump_class_stats, + .walk = cake_walk, +}; + +static struct Qdisc_ops cake_qdisc_ops __read_mostly = { + .cl_ops = &cake_class_ops, + .id = "cake", + .priv_size = sizeof(struct cake_sched_data), + .enqueue = cake_enqueue, + .dequeue = cake_dequeue, + .peek = qdisc_peek_dequeued, + .init = cake_init, + .reset = cake_reset, + .destroy = cake_destroy, + .change = cake_change, + .dump = cake_dump, + .dump_stats = cake_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init cake_module_init(void) +{ + return register_qdisc(&cake_qdisc_ops); +} + +static void __exit cake_module_exit(void) +{ + unregister_qdisc(&cake_qdisc_ops); +} + +module_init(cake_module_init) +module_exit(cake_module_exit) +MODULE_AUTHOR("Jonathan Morton"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("The CAKE shaper."); diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index d36dc26c6c51..07991e1025c7 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -105,7 +105,7 @@ do_test_span_vlan_dir_ips() # Install the capture as skip_hw to avoid double-counting of packets. # The traffic is meant for local box anyway, so will be trapped to # kernel. - vlan_capture_install $dev "skip_hw vlan_id $vid" + vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip" mirror_test v$h1 $ip1 $ip2 $dev 100 $expect mirror_test v$h2 $ip2 $ip1 $dev 100 $expect vlan_capture_uninstall $dev |