From 4ebe78e15b95e8baaf7c3686694b59319b215f38 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Thu, 29 Aug 2024 13:39:33 +0530 Subject: octeontx2-af: use dynamic interrupt vectors for CN10K This patch updates the driver to use a dynamic number of vectors instead of a hard-coded value. This change accommodates the CN10KB, which has 2 vectors, unlike the previously supported chips that have 3 vectors. Signed-off-by: Srujana Challa Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 5 +- .../net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 89 ++++++++++++++++++---- .../net/ethernet/marvell/octeontx2/af/rvu_struct.h | 6 +- 3 files changed, 80 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index ed2160cc5acb..6ea2f3071fe8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1856,8 +1856,9 @@ struct cpt_flt_eng_info_req { struct cpt_flt_eng_info_rsp { struct mbox_msghdr hdr; - u64 flt_eng_map[CPT_10K_AF_INT_VEC_RVU]; - u64 rcvrd_eng_map[CPT_10K_AF_INT_VEC_RVU]; +#define CPT_AF_MAX_FLT_INT_VECS 3 + u64 flt_eng_map[CPT_AF_MAX_FLT_INT_VECS]; + u64 rcvrd_eng_map[CPT_AF_MAX_FLT_INT_VECS]; u64 rsvd; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index daf4b951e905..cd5b21cb0427 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -19,6 +19,9 @@ /* Length of initial context fetch in 128 byte words */ #define CPT_CTX_ILEN 1ULL +/* Interrupt vector count of CPT RVU and RAS interrupts */ +#define CPT_10K_AF_RVU_RAS_INT_VEC_CNT 2 + #define cpt_get_eng_sts(e_min, e_max, rsp, etype) \ ({ \ u64 free_sts = 0, busy_sts = 0; \ @@ -37,6 +40,41 @@ (_rsp)->free_sts_##etype = free_sts; \ }) +#define MAX_AE GENMASK_ULL(47, 32) +#define MAX_IE GENMASK_ULL(31, 16) +#define MAX_SE GENMASK_ULL(15, 0) + +static u16 cpt_max_engines_get(struct rvu *rvu) +{ + u16 max_ses, max_ies, max_aes; + u64 reg; + + reg = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_CONSTANTS1); + max_ses = FIELD_GET(MAX_SE, reg); + max_ies = FIELD_GET(MAX_IE, reg); + max_aes = FIELD_GET(MAX_AE, reg); + + return max_ses + max_ies + max_aes; +} + +/* Number of flt interrupt vectors are depends on number of engines that the + * chip has. Each flt vector represents 64 engines. + */ +static int cpt_10k_flt_nvecs_get(struct rvu *rvu, u16 max_engs) +{ + int flt_vecs; + + flt_vecs = DIV_ROUND_UP(max_engs, 64); + + if (flt_vecs > CPT_10K_AF_INT_VEC_FLT_MAX) { + dev_warn_once(rvu->dev, "flt_vecs:%d exceeds the max vectors:%d\n", + flt_vecs, CPT_10K_AF_INT_VEC_FLT_MAX); + flt_vecs = CPT_10K_AF_INT_VEC_FLT_MAX; + } + + return flt_vecs; +} + static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr) { struct rvu_block *block = ptr; @@ -150,17 +188,26 @@ static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off) { struct rvu *rvu = block->rvu; int blkaddr = block->addr; - int i; + int i, flt_vecs; + u16 max_engs; + u8 nr; + + max_engs = cpt_max_engines_get(rvu); + flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs); /* Disable all CPT AF interrupts */ - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL); - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL); - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF); + for (i = CPT_10K_AF_INT_VEC_FLT0; i < flt_vecs; i++) { + nr = (max_engs > 64) ? 64 : max_engs; + max_engs -= nr; + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i), + INTR_MASK(nr)); + } rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1); rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1); - for (i = 0; i < CPT_10K_AF_INT_VEC_CNT; i++) + /* CPT AF interrupt vectors are flt_int, rvu_int and ras_int. */ + for (i = 0; i < flt_vecs + CPT_10K_AF_RVU_RAS_INT_VEC_CNT; i++) if (rvu->irq_allocated[off + i]) { free_irq(pci_irq_vector(rvu->pdev, off + i), block); rvu->irq_allocated[off + i] = false; @@ -206,12 +253,18 @@ void rvu_cpt_unregister_interrupts(struct rvu *rvu) static int cpt_10k_register_interrupts(struct rvu_block *block, int off) { + int rvu_intr_vec, ras_intr_vec; struct rvu *rvu = block->rvu; int blkaddr = block->addr; irq_handler_t flt_fn; - int i, ret; + int i, ret, flt_vecs; + u16 max_engs; + u8 nr; + + max_engs = cpt_max_engines_get(rvu); + flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs); - for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) { + for (i = CPT_10K_AF_INT_VEC_FLT0; i < flt_vecs; i++) { sprintf(&rvu->irq_name[(off + i) * NAME_SIZE], "CPTAF FLT%d", i); switch (i) { @@ -229,20 +282,24 @@ static int cpt_10k_register_interrupts(struct rvu_block *block, int off) flt_fn, &rvu->irq_name[(off + i) * NAME_SIZE]); if (ret) goto err; - if (i == CPT_10K_AF_INT_VEC_FLT2) - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0xFFFF); - else - rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL); + + nr = (max_engs > 64) ? 64 : max_engs; + max_engs -= nr; + rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), + INTR_MASK(nr)); } - ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RVU, + rvu_intr_vec = flt_vecs; + ras_intr_vec = rvu_intr_vec + 1; + + ret = rvu_cpt_do_register_interrupt(block, off + rvu_intr_vec, rvu_cpt_af_rvu_intr_handler, "CPTAF RVU"); if (ret) goto err; rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1S, 0x1); - ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RAS, + ret = rvu_cpt_do_register_interrupt(block, off + ras_intr_vec, rvu_cpt_af_ras_intr_handler, "CPTAF RAS"); if (ret) @@ -921,13 +978,17 @@ int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_r struct rvu_block *block; unsigned long flags; int blkaddr, vec; + int flt_vecs; + u16 max_engs; blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) return blkaddr; block = &rvu->hw->block[blkaddr]; - for (vec = 0; vec < CPT_10K_AF_INT_VEC_RVU; vec++) { + max_engs = cpt_max_engines_get(rvu); + flt_vecs = cpt_10k_flt_nvecs_get(rvu, max_engs); + for (vec = 0; vec < flt_vecs; vec++) { spin_lock_irqsave(&rvu->cpt_intr_lock, flags); rsp->flt_eng_map[vec] = block->cpt_flt_eng_map[vec]; rsp->rcvrd_eng_map[vec] = block->cpt_rcvrd_eng_map[vec]; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 5ef406c7e8a4..fc8da2090657 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -71,13 +71,11 @@ enum cpt_af_int_vec_e { CPT_AF_INT_VEC_CNT = 0x4, }; -enum cpt_10k_af_int_vec_e { +enum cpt_cn10k_flt_int_vec_e { CPT_10K_AF_INT_VEC_FLT0 = 0x0, CPT_10K_AF_INT_VEC_FLT1 = 0x1, CPT_10K_AF_INT_VEC_FLT2 = 0x2, - CPT_10K_AF_INT_VEC_RVU = 0x3, - CPT_10K_AF_INT_VEC_RAS = 0x4, - CPT_10K_AF_INT_VEC_CNT = 0x5, + CPT_10K_AF_INT_VEC_FLT_MAX = 0x3, }; /* NPA Admin function Interrupt Vector Enumeration */ -- cgit From 1652623291c50a9ec4db3c416b7d01701b4012ff Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Thu, 29 Aug 2024 13:39:34 +0530 Subject: octeontx2-af: avoid RXC register access for CN10KB This patch modifies the driver to prevent access to RXC hardware registers on the CN10KB, as RXC is not available on this chip. Signed-off-by: Srujana Challa Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 10 ++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 03ee93fd9e94..64c9c9ee000d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -400,6 +400,7 @@ struct hw_cap { bool nix_multiple_dwrr_mtu; /* Multiple DWRR_MTU to choose from */ bool npc_hash_extract; /* Hash extract enabled ? */ bool npc_exact_match_enabled; /* Exact match supported ? */ + bool cpt_rxc; /* Is CPT-RXC supported */ }; struct rvu_hwinfo { @@ -690,6 +691,15 @@ static inline bool is_cnf10ka_a0(struct rvu *rvu) return false; } +static inline bool is_cn10kb(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_B) + return true; + return false; +} + static inline bool is_rvu_npc_hash_extract_en(struct rvu *rvu) { u64 npc_const3; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index cd5b21cb0427..d44614a63a7b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -789,6 +789,8 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr) { + struct rvu_hwinfo *hw = rvu->hw; + if (is_rvu_otx2(rvu)) return; @@ -812,14 +814,16 @@ static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr) rsp->ctx_err = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ERR); rsp->ctx_enc_id = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ENC_ID); rsp->ctx_flush_timer = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FLUSH_TIMER); + rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0)); + rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1)); + if (!hw->cap.cpt_rxc) + return; rsp->rxc_time = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME); rsp->rxc_time_cfg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME_CFG); rsp->rxc_active_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ACTIVE_STS); rsp->rxc_zombie_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ZOMBIE_STS); rsp->rxc_dfrg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_DFRG); - rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0)); - rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1)); } static void get_eng_sts(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr) @@ -1004,10 +1008,11 @@ int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_r static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr) { struct cpt_rxc_time_cfg_req req, prev; + struct rvu_hwinfo *hw = rvu->hw; int timeout = 2000; u64 reg; - if (is_rvu_otx2(rvu)) + if (!hw->cap.cpt_rxc) return; /* Set time limit to minimum values, so that rxc entries will be @@ -1282,8 +1287,14 @@ unlock: int rvu_cpt_init(struct rvu *rvu) { + struct rvu_hwinfo *hw = rvu->hw; + /* Retrieve CPT PF number */ rvu->cpt_pf_num = get_cpt_pf_num(rvu); + if (is_block_implemented(rvu->hw, BLKADDR_CPT0) && !is_rvu_otx2(rvu) && + !is_cn10kb(rvu)) + hw->cap.cpt_rxc = true; + spin_lock_init(&rvu->cpt_intr_lock); return 0; -- cgit From 5da8de8cb3e3b01fd838536c75a36b667eca128b Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Thu, 29 Aug 2024 13:39:35 +0530 Subject: octeontx2-af: configure default CPT credits for CN10KA B0 The maximum CPT credits that RXC can use are now configurable on CN10KA B0 through a hardware CSR. This patch sets the default value to optimize peak performance, aligning it with other chip versions. Signed-off-by: Srujana Challa Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 20 ++++++++++++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 18 ++++++++++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h | 1 + 3 files changed, 39 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 64c9c9ee000d..43b1d83686d1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -691,6 +691,26 @@ static inline bool is_cnf10ka_a0(struct rvu *rvu) return false; } +static inline bool is_cn10ka_a0(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A && + (pdev->revision & 0x0F) == 0x0) + return true; + return false; +} + +static inline bool is_cn10ka_a1(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A && + (pdev->revision & 0x0F) == 0x1) + return true; + return false; +} + static inline bool is_cn10kb(struct rvu *rvu) { struct pci_dev *pdev = rvu->pdev; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index d44614a63a7b..3c5bbaf12e59 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -22,6 +22,9 @@ /* Interrupt vector count of CPT RVU and RAS interrupts */ #define CPT_10K_AF_RVU_RAS_INT_VEC_CNT 2 +/* Default CPT_AF_RXC_CFG1:max_rxc_icb_cnt */ +#define CPT_DFLT_MAX_RXC_ICB_CNT 0xC0ULL + #define cpt_get_eng_sts(e_min, e_max, rsp, etype) \ ({ \ u64 free_sts = 0, busy_sts = 0; \ @@ -737,6 +740,7 @@ static bool validate_and_update_reg_offset(struct rvu *rvu, case CPT_AF_BLK_RST: case CPT_AF_CONSTANTS1: case CPT_AF_CTX_FLUSH_TIMER: + case CPT_AF_RXC_CFG1: return true; } @@ -1285,9 +1289,12 @@ unlock: return 0; } +#define MAX_RXC_ICB_CNT GENMASK_ULL(40, 32) + int rvu_cpt_init(struct rvu *rvu) { struct rvu_hwinfo *hw = rvu->hw; + u64 reg_val; /* Retrieve CPT PF number */ rvu->cpt_pf_num = get_cpt_pf_num(rvu); @@ -1295,6 +1302,17 @@ int rvu_cpt_init(struct rvu *rvu) !is_cn10kb(rvu)) hw->cap.cpt_rxc = true; + if (hw->cap.cpt_rxc && !is_cn10ka_a0(rvu) && !is_cn10ka_a1(rvu)) { + /* Set CPT_AF_RXC_CFG1:max_rxc_icb_cnt to 0xc0 to not effect + * inline inbound peak performance + */ + reg_val = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_RXC_CFG1); + reg_val &= ~MAX_RXC_ICB_CNT; + reg_val |= FIELD_PREP(MAX_RXC_ICB_CNT, + CPT_DFLT_MAX_RXC_ICB_CNT); + rvu_write64(rvu, BLKADDR_CPT0, CPT_AF_RXC_CFG1, reg_val); + } + spin_lock_init(&rvu->cpt_intr_lock); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index d56be5fb7eb4..2b299fa85159 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -545,6 +545,7 @@ #define CPT_AF_CTX_PSH_PC (0x49450ull) #define CPT_AF_CTX_PSH_LATENCY_PC (0x49458ull) #define CPT_AF_CTX_CAM_DATA(a) (0x49800ull | (u64)(a) << 3) +#define CPT_AF_RXC_CFG1 (0x50000ull) #define CPT_AF_RXC_TIME (0x50010ull) #define CPT_AF_RXC_TIME_CFG (0x50018ull) #define CPT_AF_RXC_DFRG (0x50020ull) -- cgit