diff options
58 files changed, 1880 insertions, 786 deletions
diff --git a/Documentation/netlink/specs/tcp_metrics.yaml b/Documentation/netlink/specs/tcp_metrics.yaml new file mode 100644 index 000000000000..1bd94f43e526 --- /dev/null +++ b/Documentation/netlink/specs/tcp_metrics.yaml @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: tcp_metrics + +protocol: genetlink-legacy + +doc: | + Management interface for TCP metrics. + +c-family-name: tcp-metrics-genl-name +c-version-name: tcp-metrics-genl-version +max-by-define: true +kernel-policy: global + +definitions: + - + name: tcp-fastopen-cookie-max + type: const + value: 16 + +attribute-sets: + - + name: tcp-metrics + name-prefix: tcp-metrics-attr- + attributes: + - + name: addr-ipv4 + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: addr-ipv6 + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + - + name: age + type: u64 + - + name: tw-tsval + type: u32 + doc: unused + - + name: tw-ts-stamp + type: s32 + doc: unused + - + name: vals + type: nest + nested-attributes: metrics + - + name: fopen-mss + type: u16 + - + name: fopen-syn-drops + type: u16 + - + name: fopen-syn-drop-ts + type: u64 + - + name: fopen-cookie + type: binary + checks: + min-len: tcp-fastopen-cookie-max + - + name: saddr-ipv4 + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: saddr-ipv6 + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + - + name: pad + type: pad + + - + name: metrics + # Intentionally don't define the name-prefix, see below. + doc: | + Attributes with metrics. Note that the values here do not match + the TCP_METRIC_* defines in the kernel, because kernel defines + are off-by one (e.g. rtt is defined as enum 0, while netlink carries + attribute type 1). + attributes: + - + name: rtt + type: u32 + doc: | + Round Trip Time (RTT), in msecs with 3 bits fractional + (left-shift by 3 to get the msec value). + - + name: rttvar + type: u32 + doc: | + Round Trip Time VARiance (RTT), in msecs with 2 bits fractional + (left-shift by 2 to get the msec value). + - + name: ssthresh + type: u32 + doc: Slow Start THRESHold. + - + name: cwnd + type: u32 + doc: Congestion Window. + - + name: reodering + type: u32 + doc: Reodering metric. + - + name: rtt-us + type: u32 + doc: | + Round Trip Time (RTT), in usecs, with 3 bits fractional + (left-shift by 3 to get the msec value). + - + name: rttvar-us + type: u32 + doc: | + Round Trip Time (RTT), in usecs, with 2 bits fractional + (left-shift by 3 to get the msec value). + +operations: + list: + - + name: get + doc: Retrieve metrics. + attribute-set: tcp-metrics + + dont-validate: [ strict, dump ] + + do: + request: &sel_attrs + attributes: + - addr-ipv4 + - addr-ipv6 + - saddr-ipv4 + - saddr-ipv6 + reply: &all_attrs + attributes: + - addr-ipv4 + - addr-ipv6 + - saddr-ipv4 + - saddr-ipv6 + - age + - vals + - fopen-mss + - fopen-syn-drops + - fopen-syn-drop-ts + - fopen-cookie + dump: + reply: *all_attrs + + - + name: del + doc: Delete metrics. + attribute-set: tcp-metrics + + dont-validate: [ strict, dump ] + flags: [ admin-perm ] + + do: + request: *sel_attrs diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1bd0c5973252..a1690207d793 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -456,8 +456,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_addr_t mapping; unsigned int length, pad = 0; u32 len, free_size, vlan_tag_flags, cfa_action, flags; - u16 prod, last_frag; + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; struct pci_dev *pdev = bp->pdev; + u16 prod, last_frag, txts_prod; struct bnxt_tx_ring_info *txr; struct bnxt_sw_tx_bd *tx_buf; __le32 lflags = 0; @@ -509,27 +510,29 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT; } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && ptp && + ptp->tx_tstamp_en) { + if (bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP) { + lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP); + tx_buf->is_ts_pkt = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + } else if (!skb_is_gso(skb)) { + u16 seq_id, hdr_off; - if (ptp && ptp->tx_tstamp_en && !skb_is_gso(skb)) { - if (atomic_dec_if_positive(&ptp->tx_avail) < 0) { - atomic64_inc(&ptp->stats.ts_err); - goto tx_no_ts; - } - if (!bnxt_ptp_parse(skb, &ptp->tx_seqid, - &ptp->tx_hdr_off)) { + if (!bnxt_ptp_parse(skb, &seq_id, &hdr_off) && + !bnxt_ptp_get_txts_prod(ptp, &txts_prod)) { if (vlan_tag_flags) - ptp->tx_hdr_off += VLAN_HLEN; + hdr_off += VLAN_HLEN; lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP); + tx_buf->is_ts_pkt = 1; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - } else { - atomic_inc(&bp->ptp_cfg->tx_avail); + + ptp->txts_req[txts_prod].tx_seqid = seq_id; + ptp->txts_req[txts_prod].tx_hdr_off = hdr_off; + tx_buf->txts_prod = txts_prod; } } } - -tx_no_ts: if (unlikely(skb->no_fcs)) lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC); @@ -758,8 +761,11 @@ tx_free: dev_kfree_skb_any(skb); tx_kick_pending: if (BNXT_TX_PTP_IS_SET(lflags)) { + txr->tx_buf_ring[txr->tx_prod].is_ts_pkt = 0; atomic64_inc(&bp->ptp_cfg->stats.ts_err); - atomic_inc(&bp->ptp_cfg->tx_avail); + if (!(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) + /* set SKB to err so PTP worker will clean up */ + ptp->txts_req[txts_prod].tx_skb = ERR_PTR(-EIO); } if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); @@ -768,7 +774,8 @@ tx_kick_pending: return NETDEV_TX_OK; } -static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, +/* Returns true if some remaining TX packets not processed. */ +static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, int budget) { struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index); @@ -777,24 +784,33 @@ static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, unsigned int tx_bytes = 0; u16 cons = txr->tx_cons; int tx_pkts = 0; + bool rc = false; while (RING_TX(bp, cons) != hw_cons) { struct bnxt_sw_tx_bd *tx_buf; struct sk_buff *skb; + bool is_ts_pkt; int j, last; tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)]; - cons = NEXT_TX(cons); skb = tx_buf->skb; - tx_buf->skb = NULL; if (unlikely(!skb)) { bnxt_sched_reset_txr(bp, txr, cons); - return; + return rc; } + is_ts_pkt = tx_buf->is_ts_pkt; + if (is_ts_pkt && (bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) { + rc = true; + break; + } + + cons = NEXT_TX(cons); tx_pkts++; tx_bytes += skb->len; + tx_buf->skb = NULL; + tx_buf->is_ts_pkt = 0; if (tx_buf->is_push) { tx_buf->is_push = 0; @@ -814,15 +830,11 @@ static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, skb_frag_size(&skb_shinfo(skb)->frags[j]), DMA_TO_DEVICE); } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(is_ts_pkt)) { if (BNXT_CHIP_P5(bp)) { /* PTP worker takes ownership of the skb */ - if (!bnxt_get_tx_ts_p5(bp, skb)) { - skb = NULL; - } else { - atomic64_inc(&bp->ptp_cfg->stats.ts_err); - atomic_inc(&bp->ptp_cfg->tx_avail); - } + bnxt_get_tx_ts_p5(bp, skb, tx_buf->txts_prod); + skb = NULL; } } @@ -837,18 +849,22 @@ next_tx_int: __netif_txq_completed_wake(txq, tx_pkts, tx_bytes, bnxt_tx_avail(bp, txr), bp->tx_wake_thresh, READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING); + + return rc; } static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) { struct bnxt_tx_ring_info *txr; + bool more = false; int i; bnxt_for_each_napi_tx(i, bnapi, txr) { if (txr->tx_hw_cons != RING_TX(bp, txr->tx_cons)) - __bnxt_tx_int(bp, txr, budget); + more |= __bnxt_tx_int(bp, txr, budget); } - bnapi->events &= ~BNXT_TX_CMP_EVENT; + if (!more) + bnapi->events &= ~BNXT_TX_CMP_EVENT; } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, @@ -2914,6 +2930,8 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, cpr->has_more_work = 1; break; } + } else if (cmp_type == CMP_TYPE_TX_L2_PKT_TS_CMP) { + bnxt_tx_ts_cmp(bp, bnapi, (struct tx_ts_cmp *)txcmp); } else if (cmp_type >= CMP_TYPE_RX_L2_CMP && cmp_type <= CMP_TYPE_RX_L2_TPA_START_V3_CMP) { if (likely(budget)) @@ -2945,8 +2963,10 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (event & BNXT_REDIRECT_EVENT) + if (event & BNXT_REDIRECT_EVENT) { xdp_do_flush(); + event &= ~BNXT_REDIRECT_EVENT; + } if (event & BNXT_TX_EVENT) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0]; @@ -2956,6 +2976,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, wmb(); bnxt_db_write_relaxed(bp, &txr->tx_db, prod); + event &= ~BNXT_TX_EVENT; } cpr->cp_raw_cons = raw_cons; @@ -2973,13 +2994,14 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi, struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); + bnapi->events &= ~BNXT_RX_EVENT; } if (bnapi->events & BNXT_AGG_EVENT) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnapi->events &= ~BNXT_AGG_EVENT; } - bnapi->events &= BNXT_TX_CMP_EVENT; } static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, @@ -6788,6 +6810,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, switch (ring_type) { case HWRM_RING_ALLOC_TX: { struct bnxt_tx_ring_info *txr; + u16 flags = 0; txr = container_of(ring, struct bnxt_tx_ring_info, tx_ring_struct); @@ -6801,6 +6824,9 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, if (bp->flags & BNXT_FLAG_TX_COAL_CMPL) req->cmpl_coal_cnt = RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_64; + if ((bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP) && bp->ptp_cfg) + flags |= RING_ALLOC_REQ_FLAGS_TX_PKT_TS_CMPL_ENABLE; + req->flags = cpu_to_le16(flags); break; } case HWRM_RING_ALLOC_RX: @@ -8981,7 +9007,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) u8 flags; int rc; - if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5(bp)) { + if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_PLUS(bp)) { rc = -ENODEV; goto no_ptp; } @@ -8997,7 +9023,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) goto exit; flags = resp->flags; - if (!(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) { + if (BNXT_CHIP_P5_AND_MINUS(bp) && + !(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) { rc = -ENODEV; goto exit; } @@ -9010,10 +9037,13 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) ptp->bp = bp; bp->ptp_cfg = ptp; } - if (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK) { + + if (flags & + (PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK | + PORT_MAC_PTP_QCFG_RESP_FLAGS_64B_PHC_TIME)) { ptp->refclk_regs[0] = le32_to_cpu(resp->ts_ref_clock_reg_lower); ptp->refclk_regs[1] = le32_to_cpu(resp->ts_ref_clock_reg_upper); - } else if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + } else if (BNXT_CHIP_P5(bp)) { ptp->refclk_regs[0] = BNXT_TS_REG_TIMESYNC_TS0_LOWER; ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER; } else { @@ -9095,6 +9125,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_RX_ALL_PKT_TS; if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_UDP_GSO_SUPPORTED) bp->flags |= BNXT_FLAG_UDP_GSO_CAP; + if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_TX_PKT_TS_CMPL_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_TX_TS_CMP; bp->tx_push_thresh = 0; if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && @@ -12136,8 +12168,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); - if (bp->ptp_cfg) - atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); + if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) + WRITE_ONCE(bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9cf0acfa04e5..e46bd11e52b0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -181,6 +181,32 @@ struct tx_cmp { #define TX_CMP_SQ_CONS_IDX(txcmp) \ (le32_to_cpu((txcmp)->sq_cons_idx) & TX_CMP_SQ_CONS_IDX_MASK) +struct tx_ts_cmp { + __le32 tx_ts_cmp_flags_type; + #define TX_TS_CMP_FLAGS_ERROR (1 << 6) + #define TX_TS_CMP_FLAGS_TS_TYPE (1 << 7) + #define TX_TS_CMP_FLAGS_TS_TYPE_PM (0 << 7) + #define TX_TS_CMP_FLAGS_TS_TYPE_PA (1 << 7) + #define TX_TS_CMP_FLAGS_TS_FALLBACK (1 << 8) + #define TX_TS_CMP_TS_SUB_NS (0xf << 12) + #define TX_TS_CMP_TS_NS_MID (0xffff << 16) + #define TX_TS_CMP_TS_NS_MID_SFT 16 + u32 tx_ts_cmp_opaque; + __le32 tx_ts_cmp_errors_v; + #define TX_TS_CMP_V (1 << 0) + #define TX_TS_CMP_TS_INVALID_ERR (1 << 10) + __le32 tx_ts_cmp_ts_ns_lo; +}; + +#define BNXT_GET_TX_TS_48B_NS(tscmp) \ + (le32_to_cpu((tscmp)->tx_ts_cmp_ts_ns_lo) | \ + ((u64)(le32_to_cpu((tscmp)->tx_ts_cmp_flags_type) & \ + TX_TS_CMP_TS_NS_MID) << TX_TS_CMP_TS_NS_MID_SFT)) + +#define BNXT_TX_TS_ERR(tscmp) \ + (((tscmp)->tx_ts_cmp_flags_type & cpu_to_le32(TX_TS_CMP_FLAGS_ERROR)) &&\ + ((tscmp)->tx_ts_cmp_errors_v & cpu_to_le32(TX_TS_CMP_TS_INVALID_ERR))) + struct rx_cmp { __le32 rx_cmp_len_flags_type; #define RX_CMP_CMP_TYPE (0x3f << 0) @@ -848,11 +874,14 @@ struct bnxt_sw_tx_bd { DEFINE_DMA_UNMAP_ADDR(mapping); DEFINE_DMA_UNMAP_LEN(len); struct page *page; - u8 is_gso; + u8 is_ts_pkt; u8 is_push; u8 action; unsigned short nr_frags; - u16 rx_prod; + union { + u16 rx_prod; + u16 txts_prod; + }; }; struct bnxt_sw_rx_bd { @@ -2237,9 +2266,17 @@ struct bnxt { (BNXT_CHIP_NUM_58700((bp)->chip_num) && \ !BNXT_CHIP_TYPE_NITRO_A0(bp))) +/* Chip class phase 3.x */ +#define BNXT_CHIP_P3(bp) \ + (BNXT_CHIP_NUM_57X0X((bp)->chip_num) || \ + BNXT_CHIP_TYPE_NITRO_A0(bp)) + #define BNXT_CHIP_P4_PLUS(bp) \ (BNXT_CHIP_P4(bp) || BNXT_CHIP_P5_PLUS(bp)) +#define BNXT_CHIP_P5_AND_MINUS(bp) \ + (BNXT_CHIP_P3(bp) || BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp)) + struct bnxt_aux_priv *aux_priv; struct bnxt_en_dev *edev; @@ -2384,6 +2421,7 @@ struct bnxt { #define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2 BIT_ULL(16) #define BNXT_FW_CAP_PCIE_STATS_SUPPORTED BIT_ULL(17) #define BNXT_FW_CAP_EXT_STATS_SUPPORTED BIT_ULL(18) + #define BNXT_FW_CAP_TX_TS_CMP BIT_ULL(19) #define BNXT_FW_CAP_ERR_RECOVER_RELOAD BIT_ULL(20) #define BNXT_FW_CAP_HOT_RESET BIT_ULL(21) #define BNXT_FW_CAP_PTP_RTC BIT_ULL(22) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index a14d46b9bfdf..37d42423459c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -110,7 +110,7 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp) } static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts, - u32 txts_tmo) + u32 txts_tmo, int slot) { struct hwrm_port_ts_query_output *resp; struct hwrm_port_ts_query_input *req; @@ -123,11 +123,12 @@ static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts, req->flags = cpu_to_le32(flags); if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) == PORT_TS_QUERY_REQ_FLAGS_PATH_TX) { + struct bnxt_ptp_tx_req *txts_req = &bp->ptp_cfg->txts_req[slot]; u32 tmo_us = txts_tmo * 1000; req->enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES); - req->ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid); - req->ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off); + req->ptp_seq_id = cpu_to_le32(txts_req->tx_seqid); + req->ptp_hdr_offset = cpu_to_le16(txts_req->tx_hdr_off); if (!tmo_us) tmo_us = BNXT_PTP_QTS_TIMEOUT; tmo_us = min(tmo_us, BNXT_PTP_QTS_MAX_TMO_US); @@ -656,6 +657,14 @@ static int bnxt_map_ptp_regs(struct bnxt *bp) (ptp->refclk_regs[i] & BNXT_GRC_OFFSET_MASK); return 0; } + if (bp->flags & BNXT_FLAG_CHIP_P7) { + for (i = 0; i < 2; i++) { + if (reg_arr[i] & BNXT_GRC_BASE_MASK) + return -EINVAL; + ptp->refclk_mapped_regs[i] = reg_arr[i]; + } + return 0; + } return -ENODEV; } @@ -674,43 +683,44 @@ static u64 bnxt_cc_read(const struct cyclecounter *cc) return ns; } -static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) +static int bnxt_stamp_tx_skb(struct bnxt *bp, int slot) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; struct skb_shared_hwtstamps timestamp; + struct bnxt_ptp_tx_req *txts_req; unsigned long now = jiffies; u64 ts = 0, ns = 0; u32 tmo = 0; int rc; - if (!ptp->txts_pending) - ptp->abs_txts_tmo = now + msecs_to_jiffies(ptp->txts_tmo); - if (!time_after_eq(now, ptp->abs_txts_tmo)) - tmo = jiffies_to_msecs(ptp->abs_txts_tmo - now); + txts_req = &ptp->txts_req[slot]; + /* make sure bnxt_get_tx_ts_p5() has updated abs_txts_tmo */ + smp_rmb(); + if (!time_after_eq(now, txts_req->abs_txts_tmo)) + tmo = jiffies_to_msecs(txts_req->abs_txts_tmo - now); rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts, - tmo); + tmo, slot); if (!rc) { memset(×tamp, 0, sizeof(timestamp)); spin_lock_bh(&ptp->ptp_lock); ns = timecounter_cyc2time(&ptp->tc, ts); spin_unlock_bh(&ptp->ptp_lock); timestamp.hwtstamp = ns_to_ktime(ns); - skb_tstamp_tx(ptp->tx_skb, ×tamp); + skb_tstamp_tx(txts_req->tx_skb, ×tamp); ptp->stats.ts_pkts++; } else { - if (!time_after_eq(jiffies, ptp->abs_txts_tmo)) { - ptp->txts_pending = true; - return; - } + if (!time_after_eq(jiffies, txts_req->abs_txts_tmo)) + return -EAGAIN; + ptp->stats.ts_lost++; netdev_warn_once(bp->dev, "TS query for TX timer failed rc = %x\n", rc); } - dev_kfree_skb_any(ptp->tx_skb); - ptp->tx_skb = NULL; - atomic_inc(&ptp->tx_avail); - ptp->txts_pending = false; + dev_kfree_skb_any(txts_req->tx_skb); + txts_req->tx_skb = NULL; + + return 0; } static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) @@ -719,12 +729,30 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) ptp_info); unsigned long now = jiffies; struct bnxt *bp = ptp->bp; + u16 cons = ptp->txts_cons; + u32 num_requests; + int rc = 0; + + num_requests = BNXT_MAX_TX_TS - READ_ONCE(ptp->tx_avail); + while (num_requests--) { + if (IS_ERR(ptp->txts_req[cons].tx_skb)) + goto next_slot; + if (!ptp->txts_req[cons].tx_skb) + break; + rc = bnxt_stamp_tx_skb(bp, cons); + if (rc == -EAGAIN) + break; +next_slot: + BNXT_PTP_INC_TX_AVAIL(ptp); + cons = NEXT_TXTS(cons); + } + ptp->txts_cons = cons; - if (ptp->tx_skb) - bnxt_stamp_tx_skb(bp, ptp->tx_skb); - - if (!time_after_eq(now, ptp->next_period)) + if (!time_after_eq(now, ptp->next_period)) { + if (rc == -EAGAIN) + return 0; return ptp->next_period - now; + } bnxt_ptp_get_current_time(bp); ptp->next_period = now + HZ; @@ -734,22 +762,37 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) spin_unlock_bh(&ptp->ptp_lock); ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD; } - if (ptp->txts_pending) + if (rc == -EAGAIN) return 0; return HZ; } -int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb) +int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod) +{ + spin_lock_bh(&ptp->ptp_tx_lock); + if (ptp->tx_avail) { + *prod = ptp->txts_prod; + ptp->txts_prod = NEXT_TXTS(*prod); + ptp->tx_avail--; + spin_unlock_bh(&ptp->ptp_tx_lock); + return 0; + } + spin_unlock_bh(&ptp->ptp_tx_lock); + atomic64_inc(&ptp->stats.ts_err); + return -ENOSPC; +} + +void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + struct bnxt_ptp_tx_req *txts_req; - if (ptp->tx_skb) { - netdev_err(bp->dev, "deferring skb:one SKB is still outstanding\n"); - return -EBUSY; - } - ptp->tx_skb = skb; + txts_req = &ptp->txts_req[prod]; + txts_req->abs_txts_tmo = jiffies + msecs_to_jiffies(ptp->txts_tmo); + /* make sure abs_txts_tmo is written first */ + smp_wmb(); + txts_req->tx_skb = skb; ptp_schedule_worker(ptp->ptp_clock, 0); - return 0; } int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts) @@ -768,6 +811,38 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts) return 0; } +void bnxt_tx_ts_cmp(struct bnxt *bp, struct bnxt_napi *bnapi, + struct tx_ts_cmp *tscmp) +{ + struct skb_shared_hwtstamps timestamp = {}; + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u32 opaque = tscmp->tx_ts_cmp_opaque; + struct bnxt_tx_ring_info *txr; + struct bnxt_sw_tx_bd *tx_buf; + u64 ts, ns; + u16 cons; + + txr = bnapi->tx_ring[TX_OPAQUE_RING(opaque)]; + ts = BNXT_GET_TX_TS_48B_NS(tscmp); + cons = TX_OPAQUE_IDX(opaque); + tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)]; + if (tx_buf->is_ts_pkt) { + if (BNXT_TX_TS_ERR(tscmp)) { + netdev_err(bp->dev, + "timestamp completion error 0x%x 0x%x\n", + le32_to_cpu(tscmp->tx_ts_cmp_flags_type), + le32_to_cpu(tscmp->tx_ts_cmp_errors_v)); + } else { + spin_lock_bh(&ptp->ptp_lock); + ns = timecounter_cyc2time(&ptp->tc, ts); + spin_unlock_bh(&ptp->ptp_lock); + timestamp.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(tx_buf->skb, ×tamp); + } + tx_buf->is_ts_pkt = 0; + } +} + static const struct ptp_clock_info bnxt_ptp_caps = { .owner = THIS_MODULE, .name = "bnxt clock", @@ -914,7 +989,7 @@ int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg) return rc; } else { rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, - &ns, 0); + &ns, 0, 0); if (rc) return rc; } @@ -954,8 +1029,9 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) bnxt_ptp_free(bp); - atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS); + WRITE_ONCE(ptp->tx_avail, BNXT_MAX_TX_TS); spin_lock_init(&ptp->ptp_lock); + spin_lock_init(&ptp->ptp_tx_lock); if (BNXT_PTP_USE_RTC(bp)) { bnxt_ptp_timecounter_init(bp, false); @@ -986,7 +1062,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) ptp->stats.ts_lost = 0; atomic64_set(&ptp->stats.ts_err, 0); - if (BNXT_CHIP_P5(bp)) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { spin_lock_bh(&ptp->ptp_lock); bnxt_refclk_read(bp, NULL, &ptp->current_time); WRITE_ONCE(ptp->old_time, ptp->current_time); @@ -1005,6 +1081,7 @@ out: void bnxt_ptp_clear(struct bnxt *bp) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + int i; if (!ptp) return; @@ -1016,9 +1093,11 @@ void bnxt_ptp_clear(struct bnxt *bp) kfree(ptp->ptp_info.pin_config); ptp->ptp_info.pin_config = NULL; - if (ptp->tx_skb) { - dev_kfree_skb_any(ptp->tx_skb); - ptp->tx_skb = NULL; + for (i = 0; i < BNXT_MAX_TX_TS; i++) { + if (ptp->txts_req[i].tx_skb) { + dev_kfree_skb_any(ptp->txts_req[i].tx_skb); + ptp->txts_req[i].tx_skb = NULL; + } } bnxt_unmap_ptp_regs(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 8c30b428a428..a9a2f9a18c9c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -85,6 +85,16 @@ struct bnxt_ptp_stats { atomic64_t ts_err; }; +#define BNXT_MAX_TX_TS 4 +#define NEXT_TXTS(idx) (((idx) + 1) & (BNXT_MAX_TX_TS - 1)) + +struct bnxt_ptp_tx_req { + struct sk_buff *tx_skb; + u16 tx_seqid; + u16 tx_hdr_off; + unsigned long abs_txts_tmo; +}; + struct bnxt_ptp_cfg { struct ptp_clock_info ptp_info; struct ptp_clock *ptp_clock; @@ -93,7 +103,8 @@ struct bnxt_ptp_cfg { struct bnxt_pps pps_info; /* serialize timecounter access */ spinlock_t ptp_lock; - struct sk_buff *tx_skb; + /* serialize ts tx request queuing */ + spinlock_t ptp_tx_lock; u64 current_time; u64 old_time; unsigned long next_period; @@ -102,11 +113,10 @@ struct bnxt_ptp_cfg { /* a 23b shift cyclecounter will overflow in ~36 mins. Check overflow every 18 mins. */ #define BNXT_PHC_OVERFLOW_PERIOD (18 * 60 * HZ) - u16 tx_seqid; - u16 tx_hdr_off; + struct bnxt_ptp_tx_req txts_req[BNXT_MAX_TX_TS]; + struct bnxt *bp; - atomic_t tx_avail; -#define BNXT_MAX_TX_TS 1 + u32 tx_avail; u16 rxctl; #define BNXT_PTP_MSG_SYNC (1 << 0) #define BNXT_PTP_MSG_DELAY_REQ (1 << 1) @@ -123,14 +133,14 @@ struct bnxt_ptp_cfg { BNXT_PTP_MSG_PDELAY_REQ | \ BNXT_PTP_MSG_PDELAY_RESP) u8 tx_tstamp_en:1; - u8 txts_pending:1; int rx_filter; u32 tstamp_filters; u32 refclk_regs[2]; u32 refclk_mapped_regs[2]; u32 txts_tmo; - unsigned long abs_txts_tmo; + u16 txts_prod; + u16 txts_cons; struct bnxt_ptp_stats stats; }; @@ -147,6 +157,13 @@ do { \ ((dst) = READ_ONCE(src)) #endif +#define BNXT_PTP_INC_TX_AVAIL(ptp) \ +do { \ + spin_lock_bh(&(ptp)->ptp_tx_lock); \ + (ptp)->tx_avail++; \ + spin_unlock_bh(&(ptp)->ptp_tx_lock); \ +} while (0) + int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off); void bnxt_ptp_update_current_time(struct bnxt *bp); void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2); @@ -154,8 +171,11 @@ int bnxt_ptp_cfg_tstamp_filters(struct bnxt *bp); void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); -int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); +int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod); +void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); +void bnxt_tx_ts_cmp(struct bnxt *bp, struct bnxt_napi *bnapi, + struct tx_ts_cmp *tscmp); void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns); int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg); int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg); diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 241906697019..a42f3f280f3e 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -608,6 +608,28 @@ static int enic_get_ts_info(struct net_device *netdev, return 0; } +static void enic_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct enic *enic = netdev_priv(netdev); + + switch (vnic_dev_get_intr_mode(enic->vdev)) { + case VNIC_DEV_INTR_MODE_MSIX: + channels->max_rx = ENIC_RQ_MAX; + channels->max_tx = ENIC_WQ_MAX; + channels->rx_count = enic->rq_count; + channels->tx_count = enic->wq_count; + break; + case VNIC_DEV_INTR_MODE_MSI: + case VNIC_DEV_INTR_MODE_INTX: + channels->max_combined = 1; + channels->combined_count = 1; + break; + default: + break; + } +} + static const struct ethtool_ops enic_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX | @@ -632,6 +654,7 @@ static const struct ethtool_ops enic_ethtool_ops = { .set_rxfh = enic_set_rxfh, .get_link_ksettings = enic_get_ksettings, .get_ts_info = enic_get_ts_info, + .get_channels = enic_get_channels, }; void enic_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 4a77f6fe2622..ebf84c240d6d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -139,6 +139,7 @@ M(MSIX_OFFSET, 0x005, msix_offset, msg_req, msix_offset_rsp) \ M(VF_FLR, 0x006, vf_flr, msg_req, msg_rsp) \ M(PTP_OP, 0x007, ptp_op, ptp_req, ptp_rsp) \ M(GET_HW_CAP, 0x008, get_hw_cap, msg_req, get_hw_cap_rsp) \ +M(NDC_SYNC_OP, 0x009, ndc_sync_op, ndc_sync_op, msg_rsp) \ M(LMTST_TBL_SETUP, 0x00a, lmtst_tbl_setup, lmtst_tbl_setup_req, \ msg_rsp) \ M(SET_VF_PERM, 0x00b, set_vf_perm, set_vf_perm, msg_rsp) \ @@ -1716,6 +1717,13 @@ struct lmtst_tbl_setup_req { u64 rsvd[4]; }; +struct ndc_sync_op { + struct mbox_msghdr hdr; + u8 nix_lf_tx_sync; + u8 nix_lf_rx_sync; + u8 npa_lf_sync; +}; + /* CPT mailbox error codes * Range 901 - 1000. */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index ff78251f92d4..5a4f774aed64 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2014,6 +2014,13 @@ int rvu_mbox_handler_vf_flr(struct rvu *rvu, struct msg_req *req, return 0; } +int rvu_ndc_sync(struct rvu *rvu, int lfblkaddr, int lfidx, u64 lfoffset) +{ + /* Sync cached info for this LF in NDC to LLC/DRAM */ + rvu_write64(rvu, lfblkaddr, lfoffset, BIT_ULL(12) | lfidx); + return rvu_poll_reg(rvu, lfblkaddr, lfoffset, BIT_ULL(12), true); +} + int rvu_mbox_handler_get_hw_cap(struct rvu *rvu, struct msg_req *req, struct get_hw_cap_rsp *rsp) { @@ -2068,6 +2075,65 @@ int rvu_mbox_handler_set_vf_perm(struct rvu *rvu, struct set_vf_perm *req, return 0; } +int rvu_mbox_handler_ndc_sync_op(struct rvu *rvu, + struct ndc_sync_op *req, + struct msg_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + int err, lfidx, lfblkaddr; + + if (req->npa_lf_sync) { + /* Get NPA LF data */ + lfblkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc); + if (lfblkaddr < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + lfidx = rvu_get_lf(rvu, &hw->block[lfblkaddr], pcifunc, 0); + if (lfidx < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + /* Sync NPA NDC */ + err = rvu_ndc_sync(rvu, lfblkaddr, + lfidx, NPA_AF_NDC_SYNC); + if (err) + dev_err(rvu->dev, + "NDC-NPA sync failed for LF %u\n", lfidx); + } + + if (!req->nix_lf_tx_sync && !req->nix_lf_rx_sync) + return 0; + + /* Get NIX LF data */ + lfblkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (lfblkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + lfidx = rvu_get_lf(rvu, &hw->block[lfblkaddr], pcifunc, 0); + if (lfidx < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + if (req->nix_lf_tx_sync) { + /* Sync NIX TX NDC */ + err = rvu_ndc_sync(rvu, lfblkaddr, + lfidx, NIX_AF_NDC_TX_SYNC); + if (err) + dev_err(rvu->dev, + "NDC-NIX-TX sync fail for LF %u\n", lfidx); + } + + if (req->nix_lf_rx_sync) { + /* Sync NIX RX NDC */ + err = rvu_ndc_sync(rvu, lfblkaddr, + lfidx, NIX_AF_NDC_RX_SYNC); + if (err) + dev_err(rvu->dev, + "NDC-NIX-RX sync failed for LF %u\n", lfidx); + } + + return 0; +} + static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid, struct mbox_msghdr *req) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 3063a84a45ef..03ee93fd9e94 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -800,6 +800,7 @@ int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf); int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc); int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero); int rvu_get_num_lbk_chans(void); +int rvu_ndc_sync(struct rvu *rvu, int lfblkid, int lfidx, u64 lfoffset); int rvu_get_blkaddr_from_slot(struct rvu *rvu, int blktype, u16 pcifunc, u16 global_slot, u16 *slot_in_block); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 00af8888e329..4dbbaa719cbf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2497,9 +2497,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) } mutex_unlock(&rvu->rsrc_lock); - /* Sync cached info for this LF in NDC-TX to LLC/DRAM */ - rvu_write64(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12) | nixlf); - err = rvu_poll_reg(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12), true); + err = rvu_ndc_sync(rvu, blkaddr, nixlf, NIX_AF_NDC_TX_SYNC); if (err) dev_err(rvu->dev, "NDC-TX sync failed for NIXLF %d\n", nixlf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 5ec92654e7ad..d56be5fb7eb4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -121,6 +121,7 @@ #define NPA_AF_LF_RST (0x0020) #define NPA_AF_GEN_CFG (0x0030) #define NPA_AF_NDC_CFG (0x0040) +#define NPA_AF_NDC_SYNC (0x0050) #define NPA_AF_INP_CTL (0x00D0) #define NPA_AF_ACTIVE_CYCLES_PC (0x00F0) #define NPA_AF_AVG_DELAY (0x0100) @@ -239,6 +240,7 @@ #define NIX_AF_RX_CPTX_INST_ADDR (0x0310) #define NIX_AF_RX_CPTX_INST_QSEL(a) (0x0320ull | (uint64_t)(a) << 3) #define NIX_AF_RX_CPTX_CREDIT(a) (0x0360ull | (uint64_t)(a) << 3) +#define NIX_AF_NDC_RX_SYNC (0x03E0) #define NIX_AF_NDC_TX_SYNC (0x03F0) #define NIX_AF_AQ_CFG (0x0400) #define NIX_AF_AQ_BASE (0x0410) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index ff05ea20409a..5492dea547a1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -3245,6 +3245,29 @@ static int otx2_sriov_configure(struct pci_dev *pdev, int numvfs) return otx2_sriov_enable(pdev, numvfs); } +static void otx2_ndc_sync(struct otx2_nic *pf) +{ + struct mbox *mbox = &pf->mbox; + struct ndc_sync_op *req; + + mutex_lock(&mbox->lock); + + req = otx2_mbox_alloc_msg_ndc_sync_op(mbox); + if (!req) { + mutex_unlock(&mbox->lock); + return; + } + + req->nix_lf_tx_sync = 1; + req->nix_lf_rx_sync = 1; + req->npa_lf_sync = 1; + + if (!otx2_sync_mbox_msg(mbox)) + dev_err(pf->dev, "NDC sync operation failed\n"); + + mutex_unlock(&mbox->lock); +} + static void otx2_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -3293,6 +3316,7 @@ static void otx2_remove(struct pci_dev *pdev) otx2_mcam_flow_del(pf); otx2_shutdown_tc(pf); otx2_shutdown_qos(pf); + otx2_ndc_sync(pf); otx2_detach_resources(&pf->mbox); if (pf->hw.lmt_info) free_percpu(pf->hw.lmt_info); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 9246ea2118ff..714d2e804694 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1608,7 +1608,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, !wolopts); - tp->dev->wol_enabled = wolopts ? 1 : 0; + tp->dev->ethtool->wol_enabled = wolopts ? 1 : 0; } } @@ -5478,7 +5478,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_set_d3_pll_down(tp, true); } else { rtl_set_d3_pll_down(tp, false); - dev->wol_enabled = 1; + dev->ethtool->wol_enabled = 1; } jumbo_max = rtl_jumbo_max(tp); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 8fa6c0e9195b..7d69302ffa0a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1396,7 +1396,7 @@ static void efx_ef10_table_reset_mc_allocations(struct efx_nic *efx) efx_mcdi_filter_table_reset_mc_allocations(efx); nic_data->must_restore_piobufs = true; efx_ef10_forget_old_piobufs(efx); - efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* Driver-created vswitches and vports must be re-created */ nic_data->must_probe_vswitching = true; diff --git a/drivers/net/ethernet/sfc/ef100_ethtool.c b/drivers/net/ethernet/sfc/ef100_ethtool.c index cf55202b3a7b..896ffca4aee2 100644 --- a/drivers/net/ethernet/sfc/ef100_ethtool.c +++ b/drivers/net/ethernet/sfc/ef100_ethtool.c @@ -59,8 +59,12 @@ const struct ethtool_ops ef100_ethtool_ops = { .get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size, .get_rxfh_key_size = efx_ethtool_get_rxfh_key_size, + .rxfh_priv_size = sizeof(struct efx_rss_context_priv), .get_rxfh = efx_ethtool_get_rxfh, .set_rxfh = efx_ethtool_set_rxfh, + .create_rxfh_context = efx_ethtool_create_rxfh_context, + .modify_rxfh_context = efx_ethtool_modify_rxfh_context, + .remove_rxfh_context = efx_ethtool_remove_rxfh_context, .get_module_info = efx_ethtool_get_module_info, .get_module_eeprom = efx_ethtool_get_module_eeprom, diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index e9d9de8e648a..6f1a01ded7d4 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -299,7 +299,7 @@ static int efx_probe_nic(struct efx_nic *efx) if (efx->n_channels > 1) netdev_rss_key_fill(efx->rss_context.rx_hash_key, sizeof(efx->rss_context.rx_hash_key)); - efx_set_default_rx_indir_table(efx, &efx->rss_context); + efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table); /* Initialise the interrupt moderation settings */ efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 48d3623735ba..7a6cab883d66 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -158,7 +158,7 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx, } /* RSS contexts */ -static inline bool efx_rss_active(struct efx_rss_context *ctx) +static inline bool efx_rss_active(struct efx_rss_context_priv *ctx) { return ctx->context_id != EFX_MCDI_RSS_CONTEXT_INVALID; } diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index 4ebd5ae23eca..13cf647051af 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -714,7 +714,7 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) mutex_lock(&efx->mac_lock); down_write(&efx->filter_sem); - mutex_lock(&efx->rss_lock); + mutex_lock(&efx->net_dev->ethtool->rss_lock); efx->type->fini(efx); } @@ -777,7 +777,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) if (efx->type->rx_restore_rss_contexts) efx->type->rx_restore_rss_contexts(efx); - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); efx->type->filter_table_restore(efx); up_write(&efx->filter_sem); @@ -793,7 +793,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) fail: efx->port_initialized = false; - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); up_write(&efx->filter_sem); mutex_unlock(&efx->mac_lock); @@ -1000,9 +1000,7 @@ int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev) efx->type->rx_hash_offset - efx->type->rx_prefix_size; efx->rx_packet_ts_offset = efx->type->rx_ts_offset - efx->type->rx_prefix_size; - INIT_LIST_HEAD(&efx->rss_context.list); - efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; - mutex_init(&efx->rss_lock); + efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; efx->vport_id = EVB_PORT_ID_ASSIGNED; spin_lock_init(&efx->stats_lock); efx->vi_stride = EFX_DEFAULT_VI_STRIDE; diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 37c69c8d90b1..0f5c68b8bab7 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -268,8 +268,12 @@ const struct ethtool_ops efx_ethtool_ops = { .set_rxnfc = efx_ethtool_set_rxnfc, .get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size, .get_rxfh_key_size = efx_ethtool_get_rxfh_key_size, + .rxfh_priv_size = sizeof(struct efx_rss_context_priv), .get_rxfh = efx_ethtool_get_rxfh, .set_rxfh = efx_ethtool_set_rxfh, + .create_rxfh_context = efx_ethtool_create_rxfh_context, + .modify_rxfh_context = efx_ethtool_modify_rxfh_context, + .remove_rxfh_context = efx_ethtool_remove_rxfh_context, .get_ts_info = efx_ethtool_get_ts_info, .get_module_info = efx_ethtool_get_module_info, .get_module_eeprom = efx_ethtool_get_module_eeprom, diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c index 7d5e5db4eac5..6ded44b86052 100644 --- a/drivers/net/ethernet/sfc/ethtool_common.c +++ b/drivers/net/ethernet/sfc/ethtool_common.c @@ -820,10 +820,10 @@ int efx_ethtool_get_rxnfc(struct net_device *net_dev, return 0; case ETHTOOL_GRXFH: { - struct efx_rss_context *ctx = &efx->rss_context; + struct efx_rss_context_priv *ctx = &efx->rss_context.priv; __u64 data; - mutex_lock(&efx->rss_lock); + mutex_lock(&net_dev->ethtool->rss_lock); if (info->flow_type & FLOW_RSS && info->rss_context) { ctx = efx_find_rss_context_entry(efx, info->rss_context); if (!ctx) { @@ -864,7 +864,7 @@ int efx_ethtool_get_rxnfc(struct net_device *net_dev, out_setdata_unlock: info->data = data; out_unlock: - mutex_unlock(&efx->rss_lock); + mutex_unlock(&net_dev->ethtool->rss_lock); return rc; } @@ -1163,46 +1163,14 @@ u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev) return efx->type->rx_hash_key_size; } -static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, - struct ethtool_rxfh_param *rxfh) -{ - struct efx_nic *efx = efx_netdev_priv(net_dev); - struct efx_rss_context *ctx; - int rc = 0; - - if (!efx->type->rx_pull_rss_context_config) - return -EOPNOTSUPP; - - mutex_lock(&efx->rss_lock); - ctx = efx_find_rss_context_entry(efx, rxfh->rss_context); - if (!ctx) { - rc = -ENOENT; - goto out_unlock; - } - rc = efx->type->rx_pull_rss_context_config(efx, ctx); - if (rc) - goto out_unlock; - - rxfh->hfunc = ETH_RSS_HASH_TOP; - if (rxfh->indir) - memcpy(rxfh->indir, ctx->rx_indir_table, - sizeof(ctx->rx_indir_table)); - if (rxfh->key) - memcpy(rxfh->key, ctx->rx_hash_key, - efx->type->rx_hash_key_size); -out_unlock: - mutex_unlock(&efx->rss_lock); - return rc; -} - int efx_ethtool_get_rxfh(struct net_device *net_dev, struct ethtool_rxfh_param *rxfh) { struct efx_nic *efx = efx_netdev_priv(net_dev); int rc; - if (rxfh->rss_context) - return efx_ethtool_get_rxfh_context(net_dev, rxfh); + if (rxfh->rss_context) /* core should never call us for these */ + return -EINVAL; rc = efx->type->rx_pull_rss_config(efx); if (rc) @@ -1218,68 +1186,85 @@ int efx_ethtool_get_rxfh(struct net_device *net_dev, return 0; } -static int efx_ethtool_set_rxfh_context(struct net_device *net_dev, - struct ethtool_rxfh_param *rxfh, - struct netlink_ext_ack *extack) +int efx_ethtool_modify_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { struct efx_nic *efx = efx_netdev_priv(net_dev); - u32 *rss_context = &rxfh->rss_context; - struct efx_rss_context *ctx; - u32 *indir = rxfh->indir; - bool allocated = false; - u8 *key = rxfh->key; - int rc; + struct efx_rss_context_priv *priv; + const u32 *indir = rxfh->indir; + const u8 *key = rxfh->key; - if (!efx->type->rx_push_rss_context_config) + if (!efx->type->rx_push_rss_context_config) { + NL_SET_ERR_MSG_MOD(extack, + "NIC type does not support custom contexts"); return -EOPNOTSUPP; - - mutex_lock(&efx->rss_lock); - - if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { - if (rxfh->rss_delete) { - /* alloc + delete == Nothing to do */ - rc = -EINVAL; - goto out_unlock; - } - ctx = efx_alloc_rss_context_entry(efx); - if (!ctx) { - rc = -ENOMEM; - goto out_unlock; - } - ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; - /* Initialise indir table and key to defaults */ - efx_set_default_rx_indir_table(efx, ctx); - netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key)); - allocated = true; - } else { - ctx = efx_find_rss_context_entry(efx, *rss_context); - if (!ctx) { - rc = -ENOENT; - goto out_unlock; - } } - - if (rxfh->rss_delete) { - /* delete this context */ - rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL); - if (!rc) - efx_free_rss_context_entry(ctx); - goto out_unlock; + /* Hash function is Toeplitz, cannot be changed */ + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) { + NL_SET_ERR_MSG_MOD(extack, "Only Toeplitz hash is supported"); + return -EOPNOTSUPP; } + priv = ethtool_rxfh_context_priv(ctx); + if (!key) - key = ctx->rx_hash_key; + key = ethtool_rxfh_context_key(ctx); if (!indir) - indir = ctx->rx_indir_table; + indir = ethtool_rxfh_context_indir(ctx); - rc = efx->type->rx_push_rss_context_config(efx, ctx, indir, key); - if (rc && allocated) - efx_free_rss_context_entry(ctx); - else - *rss_context = ctx->user_id; -out_unlock: - mutex_unlock(&efx->rss_lock); - return rc; + return efx->type->rx_push_rss_context_config(efx, priv, indir, key, + false); +} + +int efx_ethtool_create_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct efx_nic *efx = efx_netdev_priv(net_dev); + struct efx_rss_context_priv *priv; + + priv = ethtool_rxfh_context_priv(ctx); + + priv->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + priv->rx_hash_udp_4tuple = false; + /* Generate default indir table and/or key if not specified. + * We use ctx as a place to store these; this is fine because + * we're doing a create, so if we fail then the ctx will just + * be deleted. + */ + if (!rxfh->indir) + efx_set_default_rx_indir_table(efx, ethtool_rxfh_context_indir(ctx)); + if (!rxfh->key) + netdev_rss_key_fill(ethtool_rxfh_context_key(ctx), + ctx->key_size); + if (rxfh->hfunc == ETH_RSS_HASH_NO_CHANGE) + ctx->hfunc = ETH_RSS_HASH_TOP; + if (rxfh->input_xfrm == RXH_XFRM_NO_CHANGE) + ctx->input_xfrm = 0; + return efx_ethtool_modify_rxfh_context(net_dev, ctx, rxfh, extack); +} + +int efx_ethtool_remove_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack) +{ + struct efx_nic *efx = efx_netdev_priv(net_dev); + struct efx_rss_context_priv *priv; + + if (!efx->type->rx_push_rss_context_config) { + NL_SET_ERR_MSG_MOD(extack, + "NIC type does not support custom contexts"); + return -EOPNOTSUPP; + } + + priv = ethtool_rxfh_context_priv(ctx); + return efx->type->rx_push_rss_context_config(efx, priv, NULL, NULL, + true); } int efx_ethtool_set_rxfh(struct net_device *net_dev, @@ -1295,8 +1280,9 @@ int efx_ethtool_set_rxfh(struct net_device *net_dev, rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (rxfh->rss_context) - return efx_ethtool_set_rxfh_context(net_dev, rxfh, extack); + /* Custom contexts should use new API */ + if (WARN_ON_ONCE(rxfh->rss_context)) + return -EIO; if (!indir && !key) return 0; diff --git a/drivers/net/ethernet/sfc/ethtool_common.h b/drivers/net/ethernet/sfc/ethtool_common.h index a680e5980213..fc52e891637d 100644 --- a/drivers/net/ethernet/sfc/ethtool_common.h +++ b/drivers/net/ethernet/sfc/ethtool_common.h @@ -49,6 +49,18 @@ int efx_ethtool_get_rxfh(struct net_device *net_dev, int efx_ethtool_set_rxfh(struct net_device *net_dev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack); +int efx_ethtool_create_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); +int efx_ethtool_modify_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); +int efx_ethtool_remove_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack); int efx_ethtool_reset(struct net_device *net_dev, u32 *flags); int efx_ethtool_get_module_eeprom(struct net_device *net_dev, struct ethtool_eeprom *ee, diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c index 4ff6586116ee..6ef96292909a 100644 --- a/drivers/net/ethernet/sfc/mcdi_filters.c +++ b/drivers/net/ethernet/sfc/mcdi_filters.c @@ -194,7 +194,7 @@ efx_mcdi_filter_push_prep_set_match_fields(struct efx_nic *efx, static void efx_mcdi_filter_push_prep(struct efx_nic *efx, const struct efx_filter_spec *spec, efx_dword_t *inbuf, u64 handle, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, bool replacing) { u32 flags = spec->flags; @@ -245,7 +245,7 @@ static void efx_mcdi_filter_push_prep(struct efx_nic *efx, static int efx_mcdi_filter_push(struct efx_nic *efx, const struct efx_filter_spec *spec, u64 *handle, - struct efx_rss_context *ctx, bool replacing) + struct efx_rss_context_priv *ctx, bool replacing) { MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN); @@ -345,9 +345,9 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx, bool replace_equal) { DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); + struct efx_rss_context_priv *ctx = NULL; struct efx_mcdi_filter_table *table; struct efx_filter_spec *saved_spec; - struct efx_rss_context *ctx = NULL; unsigned int match_pri, hash; unsigned int priv_flags; bool rss_locked = false; @@ -380,12 +380,12 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx, bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { - mutex_lock(&efx->rss_lock); + mutex_lock(&efx->net_dev->ethtool->rss_lock); rss_locked = true; if (spec->rss_context) ctx = efx_find_rss_context_entry(efx, spec->rss_context); else - ctx = &efx->rss_context; + ctx = &efx->rss_context.priv; if (!ctx) { rc = -ENOENT; goto out_unlock; @@ -548,7 +548,7 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx, out_unlock: if (rss_locked) - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); up_write(&table->lock); return rc; } @@ -611,13 +611,13 @@ static int efx_mcdi_filter_remove_internal(struct efx_nic *efx, new_spec.priority = EFX_FILTER_PRI_AUTO; new_spec.flags = (EFX_FILTER_FLAG_RX | - (efx_rss_active(&efx->rss_context) ? + (efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0)); new_spec.dmaq_id = 0; new_spec.rss_context = 0; rc = efx_mcdi_filter_push(efx, &new_spec, &table->entry[filter_idx].handle, - &efx->rss_context, + &efx->rss_context.priv, true); if (rc == 0) @@ -764,7 +764,7 @@ static int efx_mcdi_filter_insert_addr_list(struct efx_nic *efx, ids = vlan->uc; } - filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; + filter_flags = efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0; /* Insert/renew filters */ for (i = 0; i < addr_count; i++) { @@ -833,7 +833,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx, int rc; u16 *id; - filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; + filter_flags = efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0; efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); @@ -1375,8 +1375,8 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx) struct efx_mcdi_filter_table *table = efx->filter_state; unsigned int invalid_filters = 0, failed = 0; struct efx_mcdi_filter_vlan *vlan; + struct efx_rss_context_priv *ctx; struct efx_filter_spec *spec; - struct efx_rss_context *ctx; unsigned int filter_idx; u32 mcdi_flags; int match_pri; @@ -1388,7 +1388,7 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx) return; down_write(&table->lock); - mutex_lock(&efx->rss_lock); + mutex_lock(&efx->net_dev->ethtool->rss_lock); for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) { spec = efx_mcdi_filter_entry_spec(table, filter_idx); @@ -1407,7 +1407,7 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx) if (spec->rss_context) ctx = efx_find_rss_context_entry(efx, spec->rss_context); else - ctx = &efx->rss_context; + ctx = &efx->rss_context.priv; if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { if (!ctx) { netif_warn(efx, drv, efx->net_dev, @@ -1444,7 +1444,7 @@ not_restored: } } - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); up_write(&table->lock); /* @@ -1861,7 +1861,8 @@ out_unlock: RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN) -int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags) +static int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, + u32 *flags) { /* * Firmware had a bug (sfc bug 61952) where it would not actually @@ -1909,8 +1910,8 @@ int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags) * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we * just need to set the UDP ports flags (for both IP versions). */ -void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, - struct efx_rss_context *ctx) +static void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, + struct efx_rss_context_priv *ctx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN); u32 flags; @@ -1931,7 +1932,7 @@ void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, } static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, unsigned *context_size) { MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN); @@ -2032,25 +2033,26 @@ void efx_mcdi_rx_free_indir_table(struct efx_nic *efx) { int rc; - if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) { - rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id); + if (efx->rss_context.priv.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) { + rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.priv.context_id); WARN_ON(rc != 0); } - efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; } static int efx_mcdi_filter_rx_push_shared_rss_config(struct efx_nic *efx, unsigned *context_size) { struct efx_mcdi_filter_table *table = efx->filter_state; - int rc = efx_mcdi_filter_alloc_rss_context(efx, false, &efx->rss_context, - context_size); + int rc = efx_mcdi_filter_alloc_rss_context(efx, false, + &efx->rss_context.priv, + context_size); if (rc != 0) return rc; table->rx_rss_context_exclusive = false; - efx_set_default_rx_indir_table(efx, &efx->rss_context); + efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table); return 0; } @@ -2058,26 +2060,27 @@ static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx, const u32 *rx_indir_table, const u8 *key) { + u32 old_rx_rss_context = efx->rss_context.priv.context_id; struct efx_mcdi_filter_table *table = efx->filter_state; - u32 old_rx_rss_context = efx->rss_context.context_id; int rc; - if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID || + if (efx->rss_context.priv.context_id == EFX_MCDI_RSS_CONTEXT_INVALID || !table->rx_rss_context_exclusive) { - rc = efx_mcdi_filter_alloc_rss_context(efx, true, &efx->rss_context, - NULL); + rc = efx_mcdi_filter_alloc_rss_context(efx, true, + &efx->rss_context.priv, + NULL); if (rc == -EOPNOTSUPP) return rc; else if (rc != 0) goto fail1; } - rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.context_id, - rx_indir_table, key); + rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.priv.context_id, + rx_indir_table, key); if (rc != 0) goto fail2; - if (efx->rss_context.context_id != old_rx_rss_context && + if (efx->rss_context.priv.context_id != old_rx_rss_context && old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID) WARN_ON(efx_mcdi_filter_free_rss_context(efx, old_rx_rss_context) != 0); table->rx_rss_context_exclusive = true; @@ -2091,9 +2094,9 @@ static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx, return 0; fail2: - if (old_rx_rss_context != efx->rss_context.context_id) { - WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id) != 0); - efx->rss_context.context_id = old_rx_rss_context; + if (old_rx_rss_context != efx->rss_context.priv.context_id) { + WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.priv.context_id) != 0); + efx->rss_context.priv.context_id = old_rx_rss_context; } fail1: netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); @@ -2101,33 +2104,28 @@ fail1: } int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, const u32 *rx_indir_table, - const u8 *key) + const u8 *key, bool delete) { int rc; - WARN_ON(!mutex_is_locked(&efx->rss_lock)); + WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock)); if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) { + if (delete) + /* already wasn't in HW, nothing to do */ + return 0; rc = efx_mcdi_filter_alloc_rss_context(efx, true, ctx, NULL); if (rc) return rc; } - if (!rx_indir_table) /* Delete this context */ + if (delete) /* Delete this context */ return efx_mcdi_filter_free_rss_context(efx, ctx->context_id); - rc = efx_mcdi_filter_populate_rss_table(efx, ctx->context_id, - rx_indir_table, key); - if (rc) - return rc; - - memcpy(ctx->rx_indir_table, rx_indir_table, - sizeof(efx->rss_context.rx_indir_table)); - memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size); - - return 0; + return efx_mcdi_filter_populate_rss_table(efx, ctx->context_id, + rx_indir_table, key); } int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, @@ -2139,16 +2137,16 @@ int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, size_t outlen; int rc, i; - WARN_ON(!mutex_is_locked(&efx->rss_lock)); + WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock)); BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN != MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN); - if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) + if (ctx->priv.context_id == EFX_MCDI_RSS_CONTEXT_INVALID) return -ENOENT; MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID, - ctx->context_id); + ctx->priv.context_id); BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN); rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf), @@ -2164,7 +2162,7 @@ int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i]; MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID, - ctx->context_id); + ctx->priv.context_id); BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) != MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN); rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf), @@ -2186,35 +2184,42 @@ int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx) { int rc; - mutex_lock(&efx->rss_lock); + mutex_lock(&efx->net_dev->ethtool->rss_lock); rc = efx_mcdi_rx_pull_rss_context_config(efx, &efx->rss_context); - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); return rc; } void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx) { struct efx_mcdi_filter_table *table = efx->filter_state; - struct efx_rss_context *ctx; + struct ethtool_rxfh_context *ctx; + unsigned long context; int rc; - WARN_ON(!mutex_is_locked(&efx->rss_lock)); + WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock)); if (!table->must_restore_rss_contexts) return; - list_for_each_entry(ctx, &efx->rss_context.list, list) { + xa_for_each(&efx->net_dev->ethtool->rss_ctx, context, ctx) { + struct efx_rss_context_priv *priv; + u32 *indir; + u8 *key; + + priv = ethtool_rxfh_context_priv(ctx); /* previous NIC RSS context is gone */ - ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + priv->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* so try to allocate a new one */ - rc = efx_mcdi_rx_push_rss_context_config(efx, ctx, - ctx->rx_indir_table, - ctx->rx_hash_key); + indir = ethtool_rxfh_context_indir(ctx); + key = ethtool_rxfh_context_key(ctx); + rc = efx_mcdi_rx_push_rss_context_config(efx, priv, indir, key, + false); if (rc) netif_warn(efx, probe, efx->net_dev, - "failed to restore RSS context %u, rc=%d" + "failed to restore RSS context %lu, rc=%d" "; RSS filters may fail to be applied\n", - ctx->user_id, rc); + context, rc); } table->must_restore_rss_contexts = false; } @@ -2276,7 +2281,7 @@ int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user, { if (user) return -EOPNOTSUPP; - if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) + if (efx->rss_context.priv.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) return 0; return efx_mcdi_filter_rx_push_shared_rss_config(efx, NULL); } @@ -2295,7 +2300,7 @@ int efx_mcdi_push_default_indir_table(struct efx_nic *efx, efx_mcdi_rx_free_indir_table(efx); if (rss_spread > 1) { - efx_set_default_rx_indir_table(efx, &efx->rss_context); + efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table); rc = efx->type->rx_push_rss_config(efx, false, efx->rss_context.rx_indir_table, NULL); } diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h index c0d6558b9fd2..11b9f87ed9e1 100644 --- a/drivers/net/ethernet/sfc/mcdi_filters.h +++ b/drivers/net/ethernet/sfc/mcdi_filters.h @@ -145,9 +145,9 @@ void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid); void efx_mcdi_rx_free_indir_table(struct efx_nic *efx); int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, const u32 *rx_indir_table, - const u8 *key); + const u8 *key, bool delete); int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user, const u32 *rx_indir_table, const u8 *key); @@ -161,10 +161,6 @@ int efx_mcdi_push_default_indir_table(struct efx_nic *efx, int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx); int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, struct efx_rss_context *ctx); -int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, - u32 *flags); -void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, - struct efx_rss_context *ctx); void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx); static inline void efx_mcdi_update_rx_scatter(struct efx_nic *efx) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index f2dd7feb0e0c..b85c51cbe7f9 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -737,21 +737,24 @@ struct vfdi_status; /* The reserved RSS context value */ #define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff /** - * struct efx_rss_context - A user-defined RSS context for filtering - * @list: node of linked list on which this struct is stored + * struct efx_rss_context_priv - driver private data for an RSS context * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or * %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC. - * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID. - * @user_id: the rss_context ID exposed to userspace over ethtool. * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled + */ +struct efx_rss_context_priv { + u32 context_id; + bool rx_hash_udp_4tuple; +}; + +/** + * struct efx_rss_context - an RSS context + * @priv: hardware-specific state * @rx_hash_key: Toeplitz hash key for this RSS context * @indir_table: Indirection table for this RSS context */ struct efx_rss_context { - struct list_head list; - u32 context_id; - u32 user_id; - bool rx_hash_udp_4tuple; + struct efx_rss_context_priv priv; u8 rx_hash_key[40]; u32 rx_indir_table[128]; }; @@ -883,9 +886,7 @@ struct efx_mae; * @rx_packet_ts_offset: Offset of timestamp from start of packet data * (valid only if channel->sync_timestamps_enabled; always negative) * @rx_scatter: Scatter mode enabled for receives - * @rss_context: Main RSS context. Its @list member is the head of the list of - * RSS contexts created by user requests - * @rss_lock: Protects custom RSS context software state in @rss_context.list + * @rss_context: Main RSS context. * @vport_id: The function's vport ID, only relevant for PFs * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired @@ -1052,7 +1053,6 @@ struct efx_nic { int rx_packet_ts_offset; bool rx_scatter; struct efx_rss_context rss_context; - struct mutex rss_lock; u32 vport_id; unsigned int_error_count; @@ -1416,9 +1416,9 @@ struct efx_nic_type { const u32 *rx_indir_table, const u8 *key); int (*rx_pull_rss_config)(struct efx_nic *efx); int (*rx_push_rss_context_config)(struct efx_nic *efx, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, const u32 *rx_indir_table, - const u8 *key); + const u8 *key, bool delete); int (*rx_pull_rss_context_config)(struct efx_nic *efx, struct efx_rss_context *ctx); void (*rx_restore_rss_contexts)(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index dcd901eccfc8..0b7dc75c40f9 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -557,69 +557,25 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, napi_gro_frags(napi); } -/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because - * (a) this is an infrequent control-plane operation and (b) n is small (max 64) - */ -struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) +struct efx_rss_context_priv *efx_find_rss_context_entry(struct efx_nic *efx, + u32 id) { - struct list_head *head = &efx->rss_context.list; - struct efx_rss_context *ctx, *new; - u32 id = 1; /* Don't use zero, that refers to the master RSS context */ - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); + struct ethtool_rxfh_context *ctx; - /* Search for first gap in the numbering */ - list_for_each_entry(ctx, head, list) { - if (ctx->user_id != id) - break; - id++; - /* Check for wrap. If this happens, we have nearly 2^32 - * allocated RSS contexts, which seems unlikely. - */ - if (WARN_ON_ONCE(!id)) - return NULL; - } + WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock)); - /* Create the new entry */ - new = kmalloc(sizeof(*new), GFP_KERNEL); - if (!new) + ctx = xa_load(&efx->net_dev->ethtool->rss_ctx, id); + if (!ctx) return NULL; - new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; - new->rx_hash_udp_4tuple = false; - - /* Insert the new entry into the gap */ - new->user_id = id; - list_add_tail(&new->list, &ctx->list); - return new; -} - -struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) -{ - struct list_head *head = &efx->rss_context.list; - struct efx_rss_context *ctx; - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - list_for_each_entry(ctx, head, list) - if (ctx->user_id == id) - return ctx; - return NULL; -} - -void efx_free_rss_context_entry(struct efx_rss_context *ctx) -{ - list_del(&ctx->list); - kfree(ctx); + return ethtool_rxfh_context_priv(ctx); } -void efx_set_default_rx_indir_table(struct efx_nic *efx, - struct efx_rss_context *ctx) +void efx_set_default_rx_indir_table(struct efx_nic *efx, u32 *indir) { size_t i; - for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) - ctx->rx_indir_table[i] = - ethtool_rxfh_indir_default(i, efx->rss_spread); + for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); i++) + indir[i] = ethtool_rxfh_indir_default(i, efx->rss_spread); } /** diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h index fbd2769307f9..75fa84192362 100644 --- a/drivers/net/ethernet/sfc/rx_common.h +++ b/drivers/net/ethernet/sfc/rx_common.h @@ -84,11 +84,9 @@ void efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, unsigned int n_frags, u8 *eh, __wsum csum); -struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); -struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); -void efx_free_rss_context_entry(struct efx_rss_context *ctx); -void efx_set_default_rx_indir_table(struct efx_nic *efx, - struct efx_rss_context *ctx); +struct efx_rss_context_priv *efx_find_rss_context_entry(struct efx_nic *efx, + u32 id); +void efx_set_default_rx_indir_table(struct efx_nic *efx, u32 *indir); bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); bool efx_filter_spec_equal(const struct efx_filter_spec *left, diff --git a/drivers/net/ethernet/tehuti/tn40.c b/drivers/net/ethernet/tehuti/tn40.c index 11db9fde11fe..565b72537efa 100644 --- a/drivers/net/ethernet/tehuti/tn40.c +++ b/drivers/net/ethernet/tehuti/tn40.c @@ -1571,6 +1571,19 @@ static const struct net_device_ops tn40_netdev_ops = { .ndo_vlan_rx_kill_vid = tn40_vlan_rx_kill_vid, }; +static int tn40_ethtool_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) +{ + struct tn40_priv *priv = netdev_priv(ndev); + + return phylink_ethtool_ksettings_get(priv->phylink, cmd); +} + +static const struct ethtool_ops tn40_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_link_ksettings = tn40_ethtool_get_link_ksettings, +}; + static int tn40_priv_init(struct tn40_priv *priv) { int ret; @@ -1599,6 +1612,7 @@ static struct net_device *tn40_netdev_alloc(struct pci_dev *pdev) if (!ndev) return NULL; ndev->netdev_ops = &tn40_netdev_ops; + ndev->ethtool_ops = &tn40_ethtool_ops; ndev->tx_queue_len = TN40_NDEV_TXQ_LEN; ndev->mem_start = pci_resource_start(pdev, 0); ndev->mem_end = pci_resource_end(pdev, 0); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 46a5a3e95202..e868f7ef4920 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -37,9 +37,9 @@ static int ngbe_set_wol(struct net_device *netdev, wx->wol = 0; if (wol->wolopts & WAKE_MAGIC) wx->wol = WX_PSR_WKUP_CTL_MAG; - netdev->wol_enabled = !!(wx->wol); + netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, WX_PSR_WKUP_CTL, wx->wol); - device_set_wakeup_enable(&pdev->dev, netdev->wol_enabled); + device_set_wakeup_enable(&pdev->dev, netdev->ethtool->wol_enabled); return 0; } diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e894e01d030d..a8119de60deb 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -650,7 +650,7 @@ static int ngbe_probe(struct pci_dev *pdev, if (wx->wol_hw_supported) wx->wol = NGBE_PSR_WKUP_CTL_MAG; - netdev->wol_enabled = !!(wx->wol); + netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, NGBE_PSR_WKUP_CTL, wx->wol); device_set_wakeup_enable(&pdev->dev, wx->wol); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c4236564c1cd..785182fa5fe0 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1309,7 +1309,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (netdev) { struct device *parent = netdev->dev.parent; - if (netdev->wol_enabled) + if (netdev->ethtool->wol_enabled) pm_system_wakeup(); else if (device_may_wakeup(&netdev->dev)) pm_wakeup_dev_event(&netdev->dev, 0, true); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6c6ec9475709..473cbc1d497b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -296,7 +296,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (!netdev) goto out; - if (netdev->wol_enabled) + if (netdev->ethtool->wol_enabled) return false; /* As long as not all affected network drivers support the @@ -1984,7 +1984,8 @@ int phy_suspend(struct phy_device *phydev) return 0; phy_ethtool_get_wol(phydev, &wol); - phydev->wol_enabled = wol.wolopts || (netdev && netdev->wol_enabled); + phydev->wol_enabled = wol.wolopts || + (netdev && netdev->ethtool->wol_enabled); /* If the device has WOL enabled, we cannot suspend the PHY */ if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND)) return -EBUSY; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6c24c48dcf0f..51c526d227fa 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2282,7 +2282,7 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) { ASSERT_RTNL(); - if (mac_wol && (!pl->netdev || pl->netdev->wol_enabled)) { + if (mac_wol && (!pl->netdev || pl->netdev->ethtool->wol_enabled)) { /* Wake-on-Lan enabled, MAC handling */ mutex_lock(&pl->state_mutex); diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 2174893c974f..bed839237fb5 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -32,6 +32,15 @@ #define RTL8211F_PHYCR2 0x19 #define RTL8211F_INSR 0x1d +#define RTL8211F_LEDCR 0x10 +#define RTL8211F_LEDCR_MODE BIT(15) +#define RTL8211F_LEDCR_ACT_TXRX BIT(4) +#define RTL8211F_LEDCR_LINK_1000 BIT(3) +#define RTL8211F_LEDCR_LINK_100 BIT(1) +#define RTL8211F_LEDCR_LINK_10 BIT(0) +#define RTL8211F_LEDCR_MASK GENMASK(4, 0) +#define RTL8211F_LEDCR_SHIFT 5 + #define RTL8211F_TX_DELAY BIT(8) #define RTL8211F_RX_DELAY BIT(3) @@ -87,6 +96,8 @@ #define RTL_8221B_VN_CG 0x001cc84a #define RTL_8251B 0x001cc862 +#define RTL8211F_LED_COUNT 3 + MODULE_DESCRIPTION("Realtek PHY driver"); MODULE_AUTHOR("Johnson Leung"); MODULE_LICENSE("GPL"); @@ -476,6 +487,98 @@ static int rtl821x_resume(struct phy_device *phydev) return 0; } +static int rtl8211f_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + const unsigned long mask = BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_RX) | + BIT(TRIGGER_NETDEV_TX); + + /* The RTL8211F PHY supports these LED settings on up to three LEDs: + * - Link: Configurable subset of 10/100/1000 link rates + * - Active: Blink on activity, RX or TX is not differentiated + * The Active option has two modes, A and B: + * - A: Link and Active indication at configurable, but matching, + * subset of 10/100/1000 link rates + * - B: Link indication at configurable subset of 10/100/1000 link + * rates and Active indication always at all three 10+100+1000 + * link rates. + * This code currently uses mode B only. + */ + + if (index >= RTL8211F_LED_COUNT) + return -EINVAL; + + /* Filter out any other unsupported triggers. */ + if (rules & ~mask) + return -EOPNOTSUPP; + + /* RX and TX are not differentiated, either both are set or not set. */ + if (!(rules & BIT(TRIGGER_NETDEV_RX)) ^ !(rules & BIT(TRIGGER_NETDEV_TX))) + return -EOPNOTSUPP; + + return 0; +} + +static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + int val; + + val = phy_read_paged(phydev, 0xd04, RTL8211F_LEDCR); + if (val < 0) + return val; + + val >>= RTL8211F_LEDCR_SHIFT * index; + val &= RTL8211F_LEDCR_MASK; + + if (val & RTL8211F_LEDCR_LINK_10) + set_bit(TRIGGER_NETDEV_LINK_10, rules); + + if (val & RTL8211F_LEDCR_LINK_100) + set_bit(TRIGGER_NETDEV_LINK_100, rules); + + if (val & RTL8211F_LEDCR_LINK_1000) + set_bit(TRIGGER_NETDEV_LINK_1000, rules); + + if (val & RTL8211F_LEDCR_ACT_TXRX) { + set_bit(TRIGGER_NETDEV_RX, rules); + set_bit(TRIGGER_NETDEV_TX, rules); + } + + return 0; +} + +static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index); + u16 reg = RTL8211F_LEDCR_MODE; /* Mode B */ + + if (index >= RTL8211F_LED_COUNT) + return -EINVAL; + + if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) + reg |= RTL8211F_LEDCR_LINK_10; + + if (test_bit(TRIGGER_NETDEV_LINK_100, &rules)) + reg |= RTL8211F_LEDCR_LINK_100; + + if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) + reg |= RTL8211F_LEDCR_LINK_1000; + + if (test_bit(TRIGGER_NETDEV_RX, &rules) || + test_bit(TRIGGER_NETDEV_TX, &rules)) { + reg |= RTL8211F_LEDCR_ACT_TXRX; + } + + reg <<= RTL8211F_LEDCR_SHIFT * index; + + return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); +} + static int rtl8211e_config_init(struct phy_device *phydev) { int ret = 0, oldpage; @@ -1192,6 +1295,9 @@ static struct phy_driver realtek_drvs[] = { .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, .flags = PHY_ALWAYS_CALL_SUSPEND, + .led_hw_is_supported = rtl8211f_led_hw_is_supported, + .led_hw_control_get = rtl8211f_led_hw_control_get, + .led_hw_control_set = rtl8211f_led_hw_control_set, }, { PHY_ID_MATCH_EXACT(RTL_8211FVD_PHYID), .name = "RTL8211F-VD Gigabit Ethernet", diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c7f6f2bc9cac..f74bb0cf8ed1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -159,6 +159,57 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) return index % n_rx_rings; } +/** + * struct ethtool_rxfh_context - a custom RSS context configuration + * @indir_size: Number of u32 entries in indirection table + * @key_size: Size of hash key, in bytes + * @priv_size: Size of driver private data, in bytes + * @hfunc: RSS hash function identifier. One of the %ETH_RSS_HASH_* + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. + * @indir_configured: indir has been specified (at create time or subsequently) + * @key_configured: hkey has been specified (at create time or subsequently) + */ +struct ethtool_rxfh_context { + u32 indir_size; + u32 key_size; + u16 priv_size; + u8 hfunc; + u8 input_xfrm; + u8 indir_configured:1; + u8 key_configured:1; + /* private: driver private data, indirection table, and hash key are + * stored sequentially in @data area. Use below helpers to access. + */ + u8 data[] __aligned(sizeof(void *)); +}; + +static inline void *ethtool_rxfh_context_priv(struct ethtool_rxfh_context *ctx) +{ + return ctx->data; +} + +static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx) +{ + return (u32 *)(ctx->data + ALIGN(ctx->priv_size, sizeof(u32))); +} + +static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) +{ + return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); +} + +static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, + u16 priv_size) +{ + size_t indir_bytes = array_size(indir_size, sizeof(u32)); + size_t flex_len; + + flex_len = size_add(size_add(indir_bytes, key_size), + ALIGN(priv_size, sizeof(u32))); + return struct_size_t(struct ethtool_rxfh_context, data, flex_len); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -670,6 +721,12 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_priv_size: size of the driver private data area the core should + * allocate for an RSS context (in &struct ethtool_rxfh_context). + * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this + * is zero then the core may choose any (nonzero) ID, otherwise the core + * will only use IDs strictly less than this value, as the @rss_context + * argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -766,6 +823,32 @@ struct ethtool_rxfh_param { * will remain unchanged. * Returns a negative error code or zero. An error code must be returned * if at least one unsupported change was requested. + * @create_rxfh_context: Create a new RSS context with the specified RX flow + * hash indirection table, hash key, and hash function. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that the indir table, hkey and hfunc are not yet populated as + * of this call. The driver does not need to update these; the core + * will do so if this op succeeds. + * However, if @rxfh.indir is set to %NULL, the driver must update the + * indir table in @ctx with the (default or inherited) table actually in + * use; similarly, if @rxfh.key is %NULL, @rxfh.hfunc is + * %ETH_RSS_HASH_NO_CHANGE, or @rxfh.input_xfrm is %RXH_XFRM_NO_CHANGE, + * the driver should update the corresponding information in @ctx. + * If the driver provides this method, it must also provide + * @modify_rxfh_context and @remove_rxfh_context. + * Returns a negative error code or zero. + * @modify_rxfh_context: Reconfigure the specified RSS context. Allows setting + * the contents of the RX flow hash indirection table, hash key, and/or + * hash function associated with the given context. + * Parameters which are set to %NULL or zero will remain unchanged. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that it will still contain the *old* settings. The driver does + * not need to update these; the core will do so if this op succeeds. + * Returns a negative error code or zero. An error code must be returned + * if at least one unsupported change was requested. + * @remove_rxfh_context: Remove the specified RSS context. + * The &struct ethtool_rxfh_context for this context is passed in @ctx. + * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -855,6 +938,8 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u16 rxfh_priv_size; + u32 rxfh_max_context_id; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); @@ -917,6 +1002,18 @@ struct ethtool_ops { int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *, struct netlink_ext_ack *extack); + int (*create_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); + int (*modify_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); + int (*remove_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); @@ -1004,6 +1101,19 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd, u32 *dev_speed, u8 *dev_duplex); +/** + * struct ethtool_netdev_state - per-netdevice state for ethtool features + * @rss_ctx: XArray of custom RSS contexts + * @rss_lock: Protects entries in @rss_ctx. May be taken from + * within RTNL. + * @wol_enabled: Wake-on-LAN is enabled + */ +struct ethtool_netdev_state { + struct xarray rss_ctx; + struct mutex rss_lock; + unsigned wol_enabled:1; +}; + struct phy_device; struct phy_tdr_config; struct phy_plca_cfg; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1e3401093c13..3c719f0d5f5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -80,6 +80,7 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +struct ethtool_netdev_state; typedef u32 xdp_features_t; @@ -1986,8 +1987,6 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @wol_enabled: Wake-on-LAN is enabled - * * @threaded: napi threaded mode is enabled * * @module_fw_flash_in_progress: Module firmware flashing is in progress. @@ -2001,6 +2000,7 @@ enum netdev_reg_state { * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state + * @ethtool: ethtool related state * @xdp_state: stores info on attached XDP BPF programs * * @nested_level: Used as a parameter of spin_lock_nested() of @@ -2375,7 +2375,7 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; - unsigned wol_enabled:1; + unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; @@ -2386,6 +2386,8 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + struct ethtool_netdev_state *ethtool; + /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 188d41da1a40..1bfdd16890fa 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1176,7 +1176,7 @@ static inline bool nft_chain_is_bound(struct nft_chain *chain) int nft_chain_add(struct nft_table *table, struct nft_chain *chain); void nft_chain_del(struct nft_chain *chain); -void nf_tables_chain_destroy(struct nft_ctx *ctx); +void nf_tables_chain_destroy(struct nft_chain *chain); struct nft_stats { u64 bytes; @@ -1613,41 +1613,67 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) } /** - * struct nft_trans - nf_tables object update in transaction + * struct nft_trans - nf_tables object update in transaction * - * @list: used internally - * @binding_list: list of objects with possible bindings - * @msg_type: message type - * @put_net: ctx->net needs to be put - * @ctx: transaction context - * @data: internal information related to the transaction + * @list: used internally + * @net: struct net + * @table: struct nft_table the object resides in + * @msg_type: message type + * @seq: netlink sequence number + * @flags: modifiers to new request + * @report: notify via unicast netlink message + * @put_net: net needs to be put + * + * This is the information common to all objects in the transaction, + * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; - struct list_head binding_list; + struct net *net; + struct nft_table *table; int msg_type; - bool put_net; - struct nft_ctx ctx; - char data[]; + u32 seq; + u16 flags; + u8 report:1; + u8 put_net:1; +}; + +/** + * struct nft_trans_binding - nf_tables object with binding support in transaction + * @nft_trans: base structure, MUST be first member + * @binding_list: list of objects with possible bindings + * + * This is the base type used by objects that can be bound to a chain. + */ +struct nft_trans_binding { + struct nft_trans nft_trans; + struct list_head binding_list; }; struct nft_trans_rule { + struct nft_trans nft_trans; struct nft_rule *rule; + struct nft_chain *chain; struct nft_flow_rule *flow; u32 rule_id; bool bound; }; -#define nft_trans_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->rule) -#define nft_trans_flow_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->flow) -#define nft_trans_rule_id(trans) \ - (((struct nft_trans_rule *)trans->data)->rule_id) -#define nft_trans_rule_bound(trans) \ - (((struct nft_trans_rule *)trans->data)->bound) +#define nft_trans_container_rule(trans) \ + container_of(trans, struct nft_trans_rule, nft_trans) +#define nft_trans_rule(trans) \ + nft_trans_container_rule(trans)->rule +#define nft_trans_flow_rule(trans) \ + nft_trans_container_rule(trans)->flow +#define nft_trans_rule_id(trans) \ + nft_trans_container_rule(trans)->rule_id +#define nft_trans_rule_bound(trans) \ + nft_trans_container_rule(trans)->bound +#define nft_trans_rule_chain(trans) \ + nft_trans_container_rule(trans)->chain struct nft_trans_set { + struct nft_trans_binding nft_trans_binding; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1657,100 +1683,117 @@ struct nft_trans_set { u32 size; }; -#define nft_trans_set(trans) \ - (((struct nft_trans_set *)trans->data)->set) -#define nft_trans_set_id(trans) \ - (((struct nft_trans_set *)trans->data)->set_id) -#define nft_trans_set_bound(trans) \ - (((struct nft_trans_set *)trans->data)->bound) -#define nft_trans_set_update(trans) \ - (((struct nft_trans_set *)trans->data)->update) -#define nft_trans_set_timeout(trans) \ - (((struct nft_trans_set *)trans->data)->timeout) -#define nft_trans_set_gc_int(trans) \ - (((struct nft_trans_set *)trans->data)->gc_int) -#define nft_trans_set_size(trans) \ - (((struct nft_trans_set *)trans->data)->size) +#define nft_trans_container_set(t) \ + container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans) +#define nft_trans_set(trans) \ + nft_trans_container_set(trans)->set +#define nft_trans_set_id(trans) \ + nft_trans_container_set(trans)->set_id +#define nft_trans_set_bound(trans) \ + nft_trans_container_set(trans)->bound +#define nft_trans_set_update(trans) \ + nft_trans_container_set(trans)->update +#define nft_trans_set_timeout(trans) \ + nft_trans_container_set(trans)->timeout +#define nft_trans_set_gc_int(trans) \ + nft_trans_container_set(trans)->gc_int +#define nft_trans_set_size(trans) \ + nft_trans_container_set(trans)->size struct nft_trans_chain { + struct nft_trans_binding nft_trans_binding; struct nft_chain *chain; - bool update; char *name; struct nft_stats __percpu *stats; u8 policy; + bool update; bool bound; u32 chain_id; struct nft_base_chain *basechain; struct list_head hook_list; }; -#define nft_trans_chain(trans) \ - (((struct nft_trans_chain *)trans->data)->chain) -#define nft_trans_chain_update(trans) \ - (((struct nft_trans_chain *)trans->data)->update) -#define nft_trans_chain_name(trans) \ - (((struct nft_trans_chain *)trans->data)->name) -#define nft_trans_chain_stats(trans) \ - (((struct nft_trans_chain *)trans->data)->stats) -#define nft_trans_chain_policy(trans) \ - (((struct nft_trans_chain *)trans->data)->policy) -#define nft_trans_chain_bound(trans) \ - (((struct nft_trans_chain *)trans->data)->bound) -#define nft_trans_chain_id(trans) \ - (((struct nft_trans_chain *)trans->data)->chain_id) -#define nft_trans_basechain(trans) \ - (((struct nft_trans_chain *)trans->data)->basechain) -#define nft_trans_chain_hooks(trans) \ - (((struct nft_trans_chain *)trans->data)->hook_list) +#define nft_trans_container_chain(t) \ + container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans) +#define nft_trans_chain(trans) \ + nft_trans_container_chain(trans)->chain +#define nft_trans_chain_update(trans) \ + nft_trans_container_chain(trans)->update +#define nft_trans_chain_name(trans) \ + nft_trans_container_chain(trans)->name +#define nft_trans_chain_stats(trans) \ + nft_trans_container_chain(trans)->stats +#define nft_trans_chain_policy(trans) \ + nft_trans_container_chain(trans)->policy +#define nft_trans_chain_bound(trans) \ + nft_trans_container_chain(trans)->bound +#define nft_trans_chain_id(trans) \ + nft_trans_container_chain(trans)->chain_id +#define nft_trans_basechain(trans) \ + nft_trans_container_chain(trans)->basechain +#define nft_trans_chain_hooks(trans) \ + nft_trans_container_chain(trans)->hook_list struct nft_trans_table { + struct nft_trans nft_trans; bool update; }; -#define nft_trans_table_update(trans) \ - (((struct nft_trans_table *)trans->data)->update) +#define nft_trans_container_table(trans) \ + container_of(trans, struct nft_trans_table, nft_trans) +#define nft_trans_table_update(trans) \ + nft_trans_container_table(trans)->update struct nft_trans_elem { + struct nft_trans nft_trans; struct nft_set *set; struct nft_elem_priv *elem_priv; bool bound; }; -#define nft_trans_elem_set(trans) \ - (((struct nft_trans_elem *)trans->data)->set) -#define nft_trans_elem_priv(trans) \ - (((struct nft_trans_elem *)trans->data)->elem_priv) -#define nft_trans_elem_set_bound(trans) \ - (((struct nft_trans_elem *)trans->data)->bound) +#define nft_trans_container_elem(t) \ + container_of(t, struct nft_trans_elem, nft_trans) +#define nft_trans_elem_set(trans) \ + nft_trans_container_elem(trans)->set +#define nft_trans_elem_priv(trans) \ + nft_trans_container_elem(trans)->elem_priv +#define nft_trans_elem_set_bound(trans) \ + nft_trans_container_elem(trans)->bound struct nft_trans_obj { + struct nft_trans nft_trans; struct nft_object *obj; struct nft_object *newobj; bool update; }; -#define nft_trans_obj(trans) \ - (((struct nft_trans_obj *)trans->data)->obj) -#define nft_trans_obj_newobj(trans) \ - (((struct nft_trans_obj *)trans->data)->newobj) -#define nft_trans_obj_update(trans) \ - (((struct nft_trans_obj *)trans->data)->update) +#define nft_trans_container_obj(t) \ + container_of(t, struct nft_trans_obj, nft_trans) +#define nft_trans_obj(trans) \ + nft_trans_container_obj(trans)->obj +#define nft_trans_obj_newobj(trans) \ + nft_trans_container_obj(trans)->newobj +#define nft_trans_obj_update(trans) \ + nft_trans_container_obj(trans)->update struct nft_trans_flowtable { + struct nft_trans nft_trans; struct nft_flowtable *flowtable; - bool update; struct list_head hook_list; u32 flags; + bool update; }; -#define nft_trans_flowtable(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flowtable) -#define nft_trans_flowtable_update(trans) \ - (((struct nft_trans_flowtable *)trans->data)->update) -#define nft_trans_flowtable_hooks(trans) \ - (((struct nft_trans_flowtable *)trans->data)->hook_list) -#define nft_trans_flowtable_flags(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flags) +#define nft_trans_container_flowtable(t) \ + container_of(t, struct nft_trans_flowtable, nft_trans) +#define nft_trans_flowtable(trans) \ + nft_trans_container_flowtable(trans)->flowtable +#define nft_trans_flowtable_update(trans) \ + nft_trans_container_flowtable(trans)->update +#define nft_trans_flowtable_hooks(trans) \ + nft_trans_container_flowtable(trans)->hook_list +#define nft_trans_flowtable_flags(trans) \ + nft_trans_container_flowtable(trans)->flags #define NFT_TRANS_GC_BATCHCOUNT 256 @@ -1764,6 +1807,33 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline void nft_ctx_update(struct nft_ctx *ctx, + const struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + case NFT_MSG_DELRULE: + case NFT_MSG_DESTROYRULE: + ctx->chain = nft_trans_rule_chain(trans); + break; + case NFT_MSG_NEWCHAIN: + case NFT_MSG_DELCHAIN: + case NFT_MSG_DESTROYCHAIN: + ctx->chain = nft_trans_chain(trans); + break; + default: + ctx->chain = NULL; + break; + } + + ctx->net = trans->net; + ctx->table = trans->table; + ctx->family = trans->table->family; + ctx->report = trans->report; + ctx->flags = trans->flags; + ctx->seq = trans->seq; +} + struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, unsigned int gc_seq, gfp_t gfp); void nft_trans_gc_destroy(struct nft_trans_gc *trans); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index aa4094ca2444..639894ed1b97 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1376,7 +1376,7 @@ enum nft_secmark_attributes { #define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1) /* Max security context length */ -#define NFT_SECMARK_CTX_MAXLEN 256 +#define NFT_SECMARK_CTX_MAXLEN 4096 /** * enum nft_reject_types - nf_tables reject expression reject types diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 7cb4a172feed..927c735a5b0e 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* tcp_metrics.h - TCP Metrics Interface */ -#ifndef _LINUX_TCP_METRICS_H -#define _LINUX_TCP_METRICS_H +#ifndef _UAPI_LINUX_TCP_METRICS_H +#define _UAPI_LINUX_TCP_METRICS_H #include <linux/types.h> @@ -27,6 +27,22 @@ enum tcp_metric_index { #define TCP_METRIC_MAX (__TCP_METRIC_MAX - 1) +/* Re-define enum tcp_metric_index, again, using the values carried + * as netlink attribute types. + */ +enum { + TCP_METRICS_A_METRICS_RTT = 1, + TCP_METRICS_A_METRICS_RTTVAR, + TCP_METRICS_A_METRICS_SSTHRESH, + TCP_METRICS_A_METRICS_CWND, + TCP_METRICS_A_METRICS_REODERING, + TCP_METRICS_A_METRICS_RTT_US, + TCP_METRICS_A_METRICS_RTTVAR_US, + + __TCP_METRICS_A_METRICS_MAX +}; +#define TCP_METRICS_A_METRICS_MAX (__TCP_METRICS_A_METRICS_MAX - 1) + enum { TCP_METRICS_ATTR_UNSPEC, TCP_METRICS_ATTR_ADDR_IPV4, /* u32 */ @@ -58,4 +74,4 @@ enum { #define TCP_METRICS_CMD_MAX (__TCP_METRICS_CMD_MAX - 1) -#endif /* _LINUX_TCP_METRICS_H */ +#endif /* _UAPI_LINUX_TCP_METRICS_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 0a23d7da7fbc..385c4091aa77 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10336,6 +10336,10 @@ int register_netdevice(struct net_device *dev) if (ret) return ret; + /* rss ctx ID 0 is reserved for the default context, start from 1 */ + xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); + mutex_init(&dev->ethtool->rss_lock); + spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -11116,6 +11120,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_all; + dev->ethtool = kzalloc(sizeof(*dev->ethtool), GFP_KERNEL_ACCOUNT); + if (!dev->ethtool) + goto free_all; strcpy(dev->name, name); dev->name_assign_type = name_assign_type; @@ -11166,6 +11173,7 @@ void free_netdev(struct net_device *dev) return; } + kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); @@ -11231,6 +11239,34 @@ void synchronize_net(void) } EXPORT_SYMBOL(synchronize_net); +static void netdev_rss_contexts_free(struct net_device *dev) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + + mutex_lock(&dev->ethtool->rss_lock); + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + struct ethtool_rxfh_param rxfh; + + rxfh.indir = ethtool_rxfh_context_indir(ctx); + rxfh.key = ethtool_rxfh_context_key(ctx); + rxfh.hfunc = ctx->hfunc; + rxfh.input_xfrm = ctx->input_xfrm; + rxfh.rss_context = context; + rxfh.rss_delete = true; + + xa_erase(&dev->ethtool->rss_ctx, context); + if (dev->ethtool_ops->create_rxfh_context) + dev->ethtool_ops->remove_rxfh_context(dev, ctx, + context, NULL); + else + dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + kfree(ctx); + } + xa_destroy(&dev->ethtool->rss_ctx); + mutex_unlock(&dev->ethtool->rss_lock); +} + /** * unregister_netdevice_queue - remove device from the kernel * @dev: device @@ -11334,11 +11370,15 @@ void unregister_netdevice_many_notify(struct list_head *head, netdev_name_node_alt_flush(dev); netdev_name_node_free(dev->name_node); + netdev_rss_contexts_free(dev); + call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + mutex_destroy(&dev->ethtool->rss_lock); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 1cca372c0d80..d8795ed07ba3 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1202,6 +1202,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_rxfh_param rxfh_dev = {}; u32 user_indir_size, user_key_size; + struct ethtool_rxfh_context *ctx; struct ethtool_rxfh rxfh; u32 indir_bytes; u8 *rss_config; @@ -1249,11 +1250,26 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) rxfh_dev.key = rss_config + indir_bytes; - rxfh_dev.rss_context = rxfh.rss_context; - - ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev); - if (ret) - goto out; + if (rxfh.rss_context) { + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); + if (!ctx) { + ret = -ENOENT; + goto out; + } + if (rxfh_dev.indir) + memcpy(rxfh_dev.indir, ethtool_rxfh_context_indir(ctx), + indir_bytes); + if (rxfh_dev.key) + memcpy(rxfh_dev.key, ethtool_rxfh_context_key(ctx), + user_key_size); + rxfh_dev.hfunc = ctx->hfunc; + rxfh_dev.input_xfrm = ctx->input_xfrm; + ret = 0; + } else { + ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev); + if (ret) + goto out; + } if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc), &rxfh_dev.hfunc, sizeof(rxfh.hfunc))) { @@ -1281,10 +1297,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; struct ethtool_rxfh_param rxfh_dev = {}; + struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; + bool locked = false; /* dev->ethtool->rss_lock taken */ u32 indir_bytes = 0; + bool create = false; u8 *rss_config; int ret; @@ -1312,6 +1331,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && !ops->cap_rss_sym_xor_supported) return -EOPNOTSUPP; + create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key, function @@ -1377,13 +1397,77 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } + if (rxfh.rss_context) { + mutex_lock(&dev->ethtool->rss_lock); + locked = true; + } + if (create) { + if (rxfh_dev.rss_delete) { + ret = -EINVAL; + goto out; + } + ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size, + dev_key_size, + ops->rxfh_priv_size), + GFP_KERNEL_ACCOUNT); + if (!ctx) { + ret = -ENOMEM; + goto out; + } + ctx->indir_size = dev_indir_size; + ctx->key_size = dev_key_size; + ctx->priv_size = ops->rxfh_priv_size; + /* Initialise to an empty context */ + ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; + ctx->input_xfrm = RXH_XFRM_NO_CHANGE; + if (ops->create_rxfh_context) { + u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 ctx_id; + + /* driver uses new API, core allocates ID */ + ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, + XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + if (ret < 0) { + kfree(ctx); + goto out; + } + WARN_ON(!ctx_id); /* can't happen */ + rxfh.rss_context = ctx_id; + } + } else if (rxfh.rss_context) { + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); + if (!ctx) { + ret = -ENOENT; + goto out; + } + } rxfh_dev.hfunc = rxfh.hfunc; rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; - ret = ops->set_rxfh(dev, &rxfh_dev, extack); - if (ret) + if (rxfh.rss_context && ops->create_rxfh_context) { + if (create) + ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, + extack); + else if (rxfh_dev.rss_delete) + ret = ops->remove_rxfh_context(dev, ctx, + rxfh.rss_context, + extack); + else + ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, + extack); + } else { + ret = ops->set_rxfh(dev, &rxfh_dev, extack); + } + if (ret) { + if (create) { + /* failed to create, free our new tracking entry */ + if (ops->create_rxfh_context) + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + kfree(ctx); + } goto out; + } if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context))) @@ -1396,8 +1480,44 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) dev->priv_flags |= IFF_RXFH_CONFIGURED; } + /* Update rss_ctx tracking */ + if (create && !ops->create_rxfh_context) { + /* driver uses old API, it chose context ID */ + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { + /* context ID reused, our tracking is screwed */ + kfree(ctx); + goto out; + } + /* Allocate the exact ID the driver gave us */ + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, + ctx, GFP_KERNEL))) { + kfree(ctx); + goto out; + } + } + if (rxfh_dev.rss_delete) { + WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); + kfree(ctx); + } else if (ctx) { + if (rxfh_dev.indir) { + for (i = 0; i < dev_indir_size; i++) + ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; + ctx->indir_configured = 1; + } + if (rxfh_dev.key) { + memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, + dev_key_size); + ctx->key_configured = 1; + } + if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) + ctx->hfunc = rxfh_dev.hfunc; + if (rxfh_dev.input_xfrm != RXH_XFRM_NO_CHANGE) + ctx->input_xfrm = rxfh_dev.input_xfrm; + } out: + if (locked) + mutex_unlock(&dev->ethtool->rss_lock); kfree(rss_config); return ret; } @@ -1509,7 +1629,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) if (ret) return ret; - dev->wol_enabled = !!wol.wolopts; + dev->ethtool->wol_enabled = !!wol.wolopts; ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL); return 0; diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index 0ed56c9ac1bc..a39d8000d808 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -137,7 +137,7 @@ ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_wol(dev, &wol); if (ret) return ret; - dev->wol_enabled = !!wol.wolopts; + dev->ethtool->wol_enabled = !!wol.wolopts; return 1; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d08bf16d476d..ed97df6af14d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -938,8 +938,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) { + if (is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 59448a2dbf2c..aa2e0a28ca61 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -357,6 +357,14 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, else uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0; + /* On the TX path, CHECKSUM_NONE and CHECKSUM_UNNECESSARY have the same + * meaning. However, check for bad offloads in the GSO stack expects the + * latter, if the checksum was calculated in software. To vouch for the + * segment skbs we actually need to set it on the gso_skb. + */ + if (gso_skb->ip_summed == CHECKSUM_NONE) + gso_skb->ip_summed = CHECKSUM_UNNECESSARY; + /* update refcount for the packet */ if (copy_dtor) { int delta = sum_truesize - gso_skb->truesize; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b56f0b9f4307..b5456394cc67 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1257,8 +1257,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) { + if (is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1e689c714127..83e452916403 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -175,7 +175,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 8715617b02fe..34ba14e59e95 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -321,7 +321,6 @@ insert_tree(struct net *net, struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; - u8 keylen = data->keylen; bool do_gc = true; spin_lock_bh(&nf_conncount_locks[hash]); @@ -333,7 +332,7 @@ restart: rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); parent = *rbnode; - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { rbnode = &((*rbnode)->rb_left); } else if (diff > 0) { @@ -378,7 +377,7 @@ restart: conn->tuple = *tuple; conn->zone = *zone; - memcpy(rbconn->key, key, sizeof(u32) * keylen); + memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); list_add(&conn->node, &rbconn->list.head); @@ -403,7 +402,6 @@ count_tree(struct net *net, struct rb_node *parent; struct nf_conncount_rb *rbconn; unsigned int hash; - u8 keylen = data->keylen; hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; root = &data->root[hash]; @@ -414,7 +412,7 @@ count_tree(struct net *net, rbconn = rb_entry(parent, struct nf_conncount_rb, node); - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { parent = rcu_dereference_raw(parent->rb_left); } else if (diff > 0) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e8dcf41d360d..70d0bad029fd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -153,14 +153,18 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, { struct nft_trans *trans; - trans = kzalloc(sizeof(struct nft_trans) + size, gfp); + trans = kzalloc(size, gfp); if (trans == NULL) return NULL; INIT_LIST_HEAD(&trans->list); - INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; - trans->ctx = *ctx; + + trans->net = ctx->net; + trans->table = ctx->table; + trans->seq = ctx->seq; + trans->flags = ctx->flags; + trans->report = ctx->report; return trans; } @@ -171,10 +175,26 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } +static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + case NFT_MSG_NEWSET: + return container_of(trans, struct nft_trans_binding, nft_trans); + } + + return NULL; +} + static void nft_trans_list_del(struct nft_trans *trans) { + struct nft_trans_binding *trans_binding; + list_del(&trans->list); - list_del(&trans->binding_list); + + trans_binding = nft_trans_get_binding(trans); + if (trans_binding) + list_del(&trans_binding->binding_list); } static void nft_trans_destroy(struct nft_trans *trans) @@ -236,7 +256,7 @@ static void __nft_chain_trans_bind(const struct nft_ctx *ctx, nft_trans_chain_bound(trans) = bind; break; case NFT_MSG_NEWRULE: - if (trans->ctx.chain == chain) + if (nft_trans_rule_chain(trans) == chain) nft_trans_rule_bound(trans) = bind; break; } @@ -372,21 +392,26 @@ static void nf_tables_unregister_hook(struct net *net, static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_trans_binding *binding; + + list_add_tail(&trans->list, &nft_net->commit_list); + + binding = nft_trans_get_binding(trans); + if (!binding) + return; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && nft_chain_binding(nft_trans_chain(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; } - - list_add_tail(&trans->list, &nft_net->commit_list); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -416,11 +441,28 @@ static int nft_deltable(struct nft_ctx *ctx) return err; } -static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +static struct nft_trans * +nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type) { + struct nft_trans_chain *trans_chain; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (!trans) + return NULL; + + trans_chain = nft_trans_container_chain(trans); + INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list); + trans_chain->chain = ctx->chain; + + return trans; +} + +static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc_chain(ctx, msg_type); if (trans == NULL) return ERR_PTR(-ENOMEM); @@ -432,7 +474,6 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } - nft_trans_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -505,6 +546,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; + nft_trans_rule_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -560,12 +602,16 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, struct nft_set *set, const struct nft_set_desc *desc) { + struct nft_trans_set *trans_set; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); if (trans == NULL) return -ENOMEM; + trans_set = nft_trans_container_set(trans); + INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list); + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); @@ -1217,11 +1263,11 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx) return true; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.table == ctx->table && + if (trans->table == ctx->table && ((trans->msg_type == NFT_MSG_NEWCHAIN && nft_trans_chain_update(trans)) || (trans->msg_type == NFT_MSG_DELCHAIN && - nft_is_base_chain(trans->ctx.chain)))) + nft_is_base_chain(nft_trans_chain(trans))))) return true; } @@ -1615,15 +1661,15 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, return nft_flush_table(&ctx); } -static void nf_tables_table_destroy(struct nft_ctx *ctx) +static void nf_tables_table_destroy(struct nft_table *table) { - if (WARN_ON(ctx->table->use > 0)) + if (WARN_ON(table->use > 0)) return; - rhltable_destroy(&ctx->table->chains_ht); - kfree(ctx->table->name); - kfree(ctx->table->udata); - kfree(ctx->table); + rhltable_destroy(&table->chains_ht); + kfree(table->name); + kfree(table->udata); + kfree(table); } void nft_register_chain_type(const struct nft_chain_type *ctype) @@ -2049,18 +2095,19 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) return newstats; } -static void nft_chain_stats_replace(struct nft_trans *trans) +static void nft_chain_stats_replace(struct nft_trans_chain *trans) { - struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain); + const struct nft_trans *t = &trans->nft_trans_binding.nft_trans; + struct nft_base_chain *chain = nft_base_chain(trans->chain); - if (!nft_trans_chain_stats(trans)) + if (!trans->stats) return; - nft_trans_chain_stats(trans) = - rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans), - lockdep_commit_lock_is_held(trans->ctx.net)); + trans->stats = + rcu_replace_pointer(chain->stats, trans->stats, + lockdep_commit_lock_is_held(t->net)); - if (!nft_trans_chain_stats(trans)) + if (!trans->stats) static_branch_inc(&nft_counters_enabled); } @@ -2078,9 +2125,9 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) kvfree(chain->blob_next); } -void nf_tables_chain_destroy(struct nft_ctx *ctx) +void nf_tables_chain_destroy(struct nft_chain *chain) { - struct nft_chain *chain = ctx->chain; + const struct nft_table *table = chain->table; struct nft_hook *hook, *next; if (WARN_ON(chain->use > 0)) @@ -2092,7 +2139,7 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx) if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); - if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, &basechain->hook_list, list) { list_del_rcu(&hook->list); @@ -2581,7 +2628,7 @@ err_chain_add: err_trans: nft_use_dec_restore(&table->use); err_destroy_chain: - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(chain); return err; } @@ -2698,8 +2745,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } err = -ENOMEM; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_NEWCHAIN); if (trans == NULL) goto err_trans; @@ -2725,7 +2771,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, err = -EEXIST; list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && - tmp->ctx.table == table && + tmp->table == table && nft_trans_chain_update(tmp) && nft_trans_chain_name(tmp) && strcmp(name, nft_trans_chain_name(tmp)) == 0) { @@ -2774,13 +2820,11 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, struct nft_trans *trans; list_for_each_entry(trans, &nft_net->commit_list, list) { - struct nft_chain *chain = trans->ctx.chain; - if (trans->msg_type == NFT_MSG_NEWCHAIN && - chain->table == table && + nft_trans_chain(trans)->table == table && id == nft_trans_chain_id(trans) && - nft_active_genmask(chain, genmask)) - return chain; + nft_active_genmask(nft_trans_chain(trans), genmask)) + return nft_trans_chain(trans); } return ERR_PTR(-ENOENT); } @@ -2915,8 +2959,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx, list_move(&hook->list, &chain_del_list); } - trans = nft_trans_alloc(ctx, NFT_MSG_DELCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); if (!trans) { err = -ENOMEM; goto err_chain_del_hook; @@ -4188,7 +4231,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE && - trans->ctx.chain == chain && + nft_trans_rule_chain(trans) == chain && id == nft_trans_rule_id(trans)) return nft_trans_rule(trans); } @@ -9417,51 +9460,53 @@ static int nf_tables_validate(struct net *net) * * We defer the drop policy until the transaction has been finalized. */ -static void nft_chain_commit_drop_policy(struct nft_trans *trans) +static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans) { struct nft_base_chain *basechain; - if (nft_trans_chain_policy(trans) != NF_DROP) + if (trans->policy != NF_DROP) return; - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(trans->chain)) return; - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(trans->chain); basechain->policy = NF_DROP; } -static void nft_chain_commit_update(struct nft_trans *trans) +static void nft_chain_commit_update(struct nft_trans_chain *trans) { + struct nft_table *table = trans->nft_trans_binding.nft_trans.table; struct nft_base_chain *basechain; - if (nft_trans_chain_name(trans)) { - rhltable_remove(&trans->ctx.table->chains_ht, - &trans->ctx.chain->rhlhead, + if (trans->name) { + rhltable_remove(&table->chains_ht, + &trans->chain->rhlhead, nft_chain_ht_params); - swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); - rhltable_insert_key(&trans->ctx.table->chains_ht, - trans->ctx.chain->name, - &trans->ctx.chain->rhlhead, + swap(trans->chain->name, trans->name); + rhltable_insert_key(&table->chains_ht, + trans->chain->name, + &trans->chain->rhlhead, nft_chain_ht_params); } - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(trans->chain)) return; nft_chain_stats_replace(trans); - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(trans->chain); - switch (nft_trans_chain_policy(trans)) { + switch (trans->policy) { case NF_DROP: case NF_ACCEPT: - basechain->policy = nft_trans_chain_policy(trans); + basechain->policy = trans->policy; break; } } -static void nft_obj_commit_update(struct nft_trans *trans) +static void nft_obj_commit_update(const struct nft_ctx *ctx, + struct nft_trans *trans) { struct nft_object *newobj; struct nft_object *obj; @@ -9473,15 +9518,21 @@ static void nft_obj_commit_update(struct nft_trans *trans) return; obj->ops->update(obj, newobj); - nft_obj_destroy(&trans->ctx, newobj); + nft_obj_destroy(ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) { + struct nft_ctx ctx = { + .net = trans->net, + }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nf_tables_table_destroy(&trans->ctx); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: free_percpu(nft_trans_chain_stats(trans)); @@ -9492,25 +9543,25 @@ static void nft_commit_release(struct nft_trans *trans) if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - nf_tables_set_elem_destroy(&trans->ctx, + nf_tables_set_elem_destroy(&ctx, nft_trans_elem_set(trans), nft_trans_elem_priv(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: @@ -9522,7 +9573,7 @@ static void nft_commit_release(struct nft_trans *trans) } if (trans->put_net) - put_net(trans->ctx.net); + put_net(trans->net); kfree(trans); } @@ -9641,10 +9692,10 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) struct nft_trans *trans, *next; list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - struct nft_chain *chain = trans->ctx.chain; - if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { + struct nft_chain *chain = nft_trans_rule_chain(trans); + kvfree(chain->blob_next); chain->blob_next = NULL; } @@ -10002,7 +10053,7 @@ static void nf_tables_commit_release(struct net *net) trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); - get_net(trans->ctx.net); + get_net(trans->net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; @@ -10146,12 +10197,15 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); + const struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct nft_trans_binding *trans_binding; struct nft_trans *trans, *next; unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + struct nft_ctx ctx; LIST_HEAD(adl); int err; @@ -10160,7 +10214,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } - list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + nft_ctx_init(&ctx, net, skb, nlh, NFPROTO_UNSPEC, NULL, NULL, NULL); + + list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) { + trans = &trans_binding->nft_trans; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && @@ -10178,6 +10235,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return -EINVAL; } break; + default: + WARN_ONCE(1, "Unhandled bind type %d", trans->msg_type); + break; } } @@ -10193,9 +10253,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* 1. Allocate space for next generation rules_gen_X[] */ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { + struct nft_table *table = trans->table; int ret; - ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); + ret = nf_tables_commit_audit_alloc(&adl, table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); nf_tables_commit_audit_free(&adl); @@ -10203,7 +10264,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - chain = trans->ctx.chain; + chain = nft_trans_rule_chain(trans); ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { @@ -10236,70 +10297,71 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) net->nft.gencursor = nft_gencursor_next(net); list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - nf_tables_commit_audit_collect(&adl, trans->ctx.table, - trans->msg_type); + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); + + nf_tables_commit_audit_collect(&adl, table, trans->msg_type); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } - if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) - nf_tables_table_disable(net, trans->ctx.table); + if (table->flags & NFT_TABLE_F_DORMANT) + nf_tables_table_disable(net, table); - trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + table->flags &= ~__NFT_TABLE_F_UPDATE; } else { - nft_clear(net, trans->ctx.table); + nft_clear(net, table); } - nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); + nf_tables_table_notify(&ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - list_del_rcu(&trans->ctx.table->list); - nf_tables_table_notify(&trans->ctx, trans->msg_type); + list_del_rcu(&table->list); + nf_tables_table_notify(&ctx, trans->msg_type); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - nft_chain_commit_update(trans); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, + nft_chain_commit_update(nft_trans_container_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, &nft_trans_chain_hooks(trans)); list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { - nft_chain_commit_drop_policy(trans); - nft_clear(net, trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL); + nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); + nft_clear(net, nft_trans_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); } } else { - nft_chain_del(trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nft_chain_del(nft_trans_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain); + nf_tables_unregister_hook(ctx.net, ctx.table, + nft_trans_chain(trans)); } break; case NFT_MSG_NEWRULE: - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nft_clear(net, nft_trans_rule(trans)); + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); @@ -10307,14 +10369,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: list_del_rcu(&nft_trans_rule(trans)->list); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), trans->msg_type); - nft_rule_expr_deactivate(&trans->ctx, - nft_trans_rule(trans), + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: @@ -10333,9 +10393,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - nft_use_dec(&trans->ctx.table->use); + nft_use_dec(&table->use); } - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; @@ -10343,14 +10403,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DESTROYSET: nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nft_setelem_activate(net, te->set, te->elem_priv); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, NFT_MSG_NEWSETELEM); if (te->set->ops->commit && @@ -10362,9 +10422,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, trans->msg_type); nft_setelem_remove(net, te->set, te->elem_priv); @@ -10380,13 +10440,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_commit_update(trans); - nf_tables_obj_notify(&trans->ctx, + nft_obj_commit_update(&ctx, trans); + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); } else { nft_clear(net, nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); nft_trans_destroy(trans); @@ -10395,14 +10455,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_obj_del(nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), trans->msg_type); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nft_trans_flowtable(trans)->data.flags = nft_trans_flowtable_flags(trans); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), NFT_MSG_NEWFLOWTABLE); @@ -10410,7 +10470,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, NFT_MSG_NEWFLOWTABLE); @@ -10420,7 +10480,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), trans->msg_type); @@ -10428,7 +10488,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, trans->msg_type); @@ -10470,28 +10530,32 @@ static void nf_tables_module_autoload(struct net *net) static void nf_tables_abort_release(struct nft_trans *trans) { + struct nft_ctx ctx = { }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: - nf_tables_table_destroy(&trans->ctx); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_NEWRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem_priv(trans), true); break; case NFT_MSG_NEWOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) @@ -10523,6 +10587,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + struct nft_ctx ctx = { + .net = net, + }; int err = 0; if (action == NFNL_ABORT_VALIDATE && @@ -10531,37 +10598,41 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } - if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { - nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { - trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + if (table->flags & __NFT_TABLE_F_WAS_DORMANT) { + nf_tables_table_disable(net, table); + table->flags |= NFT_TABLE_F_DORMANT; + } else if (table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + table->flags &= ~NFT_TABLE_F_DORMANT; } - if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) { - trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER; - trans->ctx.table->nlpid = 0; + if (table->flags & __NFT_TABLE_F_WAS_ORPHAN) { + table->flags &= ~NFT_TABLE_F_OWNER; + table->nlpid = 0; } - trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { - list_del_rcu(&trans->ctx.table->list); + list_del_rcu(&table->list); } break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nft_clear(trans->ctx.net, trans->ctx.table); + nft_clear(trans->net, table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); @@ -10574,11 +10645,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.table->use); - nft_chain_del(trans->ctx.chain); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain); + nft_use_dec_restore(&table->use); + nft_chain_del(nft_trans_chain(trans)); + nf_tables_unregister_hook(trans->net, table, + nft_trans_chain(trans)); } break; case NFT_MSG_DELCHAIN: @@ -10587,8 +10657,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); } else { - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, trans->ctx.chain); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_chain(trans)); } nft_trans_destroy(trans); break; @@ -10597,20 +10667,20 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.chain->use); + nft_use_dec_restore(&nft_trans_rule_chain(trans)->use); list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nft_use_inc_restore(&trans->ctx.chain->use); - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_use_inc_restore(&nft_trans_rule_chain(trans)->use); + nft_clear(trans->net, nft_trans_rule(trans)); + nft_rule_expr_activate(&ctx, nft_trans_rule(trans)); + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); @@ -10620,7 +10690,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -10630,10 +10700,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_set(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_set(trans)); if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) - nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_map_activate(&ctx, nft_trans_set(trans)); nft_trans_destroy(trans); break; @@ -10642,7 +10712,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nft_setelem_remove(net, te->set, te->elem_priv); if (!nft_setelem_is_catchall(te->set, te->elem_priv)) atomic_dec(&te->set->nelems); @@ -10655,7 +10725,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { nft_setelem_data_activate(net, te->set, te->elem_priv); @@ -10673,17 +10743,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: @@ -10691,7 +10761,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable_hooks(trans)); } else { - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable(trans)->hook_list); @@ -10703,8 +10773,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_splice(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); } else { - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); break; @@ -11365,7 +11435,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) } nft_chain_del(ctx->chain); nft_use_dec(&ctx->table->use); - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(ctx->chain); return 0; } @@ -11440,12 +11510,11 @@ static void __nft_release_table(struct net *net, struct nft_table *table) nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; nft_chain_del(chain); nft_use_dec(&table->use); - nf_tables_chain_destroy(&ctx); + nf_tables_chain_destroy(chain); } - nf_tables_table_destroy(&ctx); + nf_tables_table_destroy(table); } static void __nft_release_tables(struct net *net) @@ -11588,6 +11657,14 @@ static int __init nf_tables_module_init(void) { int err; + BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans_binding.nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans_binding.nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0); + err = register_pernet_subsys(&nf_tables_net_ops); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 12ab78fa5d84..64675f1c7f29 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -513,38 +513,38 @@ static void nft_flow_rule_offload_abort(struct net *net, int err = 0; list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; - err = nft_flow_offload_chain(trans->ctx.chain, NULL, + err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_UNBIND); break; case NFT_MSG_DELCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_chain(trans->ctx.chain, NULL, + err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); @@ -564,46 +564,46 @@ int nft_flow_rule_offload_commit(struct net *net) u8 policy; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; policy = nft_trans_chain_policy(trans); - err = nft_flow_offload_chain(trans->ctx.chain, &policy, + err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_BIND); break; case NFT_MSG_DELCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; policy = nft_trans_chain_policy(trans); - err = nft_flow_offload_chain(trans->ctx.chain, &policy, + err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - if (trans->ctx.flags & NLM_F_REPLACE || - !(trans->ctx.flags & NLM_F_APPEND)) { + if (trans->flags & NLM_F_REPLACE || + !(trans->flags & NLM_F_APPEND)) { err = -EOPNOTSUPP; break; } - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index f466af4f8531..eab4f476b47f 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -366,8 +366,7 @@ static int cttimeout_default_set(struct sk_buff *skb, __u8 l4num; int ret; - if (!cda[CTA_TIMEOUT_L3PROTO] || - !cda[CTA_TIMEOUT_L4PROTO] || + if (!cda[CTA_TIMEOUT_L4PROTO] || !cda[CTA_TIMEOUT_DATA]) return -EINVAL; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 6475c7abc1fe..ac2422c215e5 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -221,7 +221,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, list_del(&rule->list); nf_tables_rule_destroy(&chain_ctx, rule); } - nf_tables_chain_destroy(&chain_ctx); + nf_tables_chain_destroy(chain); break; default: break; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index ef93e0d3bee0..588a5e6ad899 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -59,9 +59,9 @@ MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* fil /* retained for backwards compatibility */ static unsigned int ip_pkt_list_tot __read_mostly; module_param(ip_pkt_list_tot, uint, 0400); -MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); +MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 65535)"); -#define XT_RECENT_MAX_NSTAMPS 256 +#define XT_RECENT_MAX_NSTAMPS 65536 struct recent_entry { struct list_head list; @@ -69,7 +69,7 @@ struct recent_entry { union nf_inet_addr addr; u_int16_t family; u_int8_t ttl; - u_int8_t index; + u_int16_t index; u_int16_t nstamps; unsigned long stamps[]; }; @@ -80,7 +80,7 @@ struct recent_table { union nf_inet_addr mask; unsigned int refcnt; unsigned int entries; - u8 nstamps_max_mask; + u_int16_t nstamps_max_mask; struct list_head lru_list; struct list_head iphash[]; }; diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index dbdca32a1c61..0712b5e82eb7 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -26,3 +26,4 @@ CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) +CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 68c7d40214eb..3a804e41f7cb 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -8,7 +8,7 @@ from lib.py import NetDrvEpEnv from lib.py import NetdevFamily from lib.py import KsftSkipEx from lib.py import rand_port -from lib.py import ethtool, ip, GenerateTraffic, CmdExitFailure +from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure def _rss_key_str(key): @@ -127,64 +127,56 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): # Try to allocate more queues when necessary qcnt = len(_get_rx_cnts(cfg)) - if qcnt >= 2 + 2 * ctx_cnt: - qcnt = None - else: + if qcnt < 2 + 2 * ctx_cnt: try: ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") except: raise KsftSkipEx("Not enough queues for the test") - ntuple = [] - ctx_id = [] ports = [] - try: - # Use queues 0 and 1 for normal traffic - ethtool(f"-X {cfg.ifname} equal 2") - for i in range(ctx_cnt): - want_cfg = f"start {2 + i * 2} equal 2" - create_cfg = want_cfg if create_with_cfg else "" - - try: - ctx_id.append(ethtool_create(cfg, "-X", f"context new {create_cfg}")) - except CmdExitFailure: - # try to carry on and skip at the end - if i == 0: - raise - ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") - ctx_cnt = i - break - - if not create_with_cfg: - ethtool(f"-X {cfg.ifname} context {ctx_id[i]} {want_cfg}") - - # Sanity check the context we just created - data = get_rss(cfg, ctx_id[i]) - ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) - ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) - - ports.append(rand_port()) - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id[i]}" - ntuple.append(ethtool_create(cfg, "-N", flow)) + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") - for i in range(ctx_cnt): - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg, port=ports[i]).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + for i in range(ctx_cnt): + want_cfg = f"start {2 + i * 2} equal 2" + create_cfg = want_cfg if create_with_cfg else "" - ksft_lt(sum(cnts[ :2]), 10000, "traffic on main context:" + str(cnts)) - ksft_ge(sum(cnts[2+i*2:4+i*2]), 20000, f"traffic on context {i}: " + str(cnts)) - ksft_eq(sum(cnts[2:2+i*2] + cnts[4+i*2:]), 0, "traffic on other contexts: " + str(cnts)) - finally: - for nid in ntuple: - ethtool(f"-N {cfg.ifname} delete {nid}") - for cid in ctx_id: - ethtool(f"-X {cfg.ifname} context {cid} delete") - ethtool(f"-X {cfg.ifname} default") - if qcnt: - ethtool(f"-L {cfg.ifname} combined {qcnt}") + try: + ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + except CmdExitFailure: + # try to carry on and skip at the end + if i == 0: + raise + ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") + ctx_cnt = i + break + + if not create_with_cfg: + ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + + # Sanity check the context we just created + data = get_rss(cfg, ctx_id) + ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) + ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + for i in range(ctx_cnt): + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=ports[i]).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_lt(sum(cnts[ :2]), 10000, "traffic on main context:" + str(cnts)) + ksft_ge(sum(cnts[2+i*2:4+i*2]), 20000, f"traffic on context {i}: " + str(cnts)) + ksft_eq(sum(cnts[2:2+i*2] + cnts[4+i*2:]), 0, "traffic on other contexts: " + str(cnts)) if requested_ctx_cnt != ctx_cnt: raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") @@ -216,24 +208,23 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): # Try to allocate more queues when necessary qcnt = len(_get_rx_cnts(cfg)) - if qcnt >= 2 + 2 * ctx_cnt: - qcnt = None - else: + if qcnt < 2 + 2 * ctx_cnt: try: ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") except: raise KsftSkipEx("Not enough queues for the test") ntuple = [] - ctx_id = [] + ctx = [] ports = [] def remove_ctx(idx): - ethtool(f"-N {cfg.ifname} delete {ntuple[idx]}") + ntuple[idx].exec() ntuple[idx] = None - ethtool(f"-X {cfg.ifname} context {ctx_id[idx]} delete") - ctx_id[idx] = None + ctx[idx].exec() + ctx[idx] = None def check_traffic(): for i in range(ctx_cnt): @@ -241,7 +232,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): GenerateTraffic(cfg, port=ports[i]).wait_pkts_and_stop(20000) cnts = _get_rx_cnts(cfg, prev=cnts) - if ctx_id[i] is None: + if ctx[i]: ksft_lt(sum(cnts[ :2]), 10000, "traffic on main context:" + str(cnts)) ksft_ge(sum(cnts[2+i*2:4+i*2]), 20000, f"traffic on context {i}: " + str(cnts)) ksft_eq(sum(cnts[2:2+i*2] + cnts[4+i*2:]), 0, "traffic on other contexts: " + str(cnts)) @@ -249,41 +240,32 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): ksft_ge(sum(cnts[ :2]), 20000, "traffic on main context:" + str(cnts)) ksft_eq(sum(cnts[2: ]), 0, "traffic on other contexts: " + str(cnts)) - try: - # Use queues 0 and 1 for normal traffic - ethtool(f"-X {cfg.ifname} equal 2") - - for i in range(ctx_cnt): - ctx_id.append(ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")) + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") - ports.append(rand_port()) - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id[i]}" - ntuple.append(ethtool_create(cfg, "-N", flow)) + for i in range(ctx_cnt): + ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2") + ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) - check_traffic() + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) - # Remove middle context - remove_ctx(ctx_cnt // 2) - check_traffic() + check_traffic() - # Remove first context - remove_ctx(0) - check_traffic() + # Remove middle context + remove_ctx(ctx_cnt // 2) + check_traffic() - # Remove last context - remove_ctx(-1) - check_traffic() + # Remove first context + remove_ctx(0) + check_traffic() - finally: - for nid in ntuple: - if nid is not None: - ethtool(f"-N {cfg.ifname} delete {nid}") - for cid in ctx_id: - if cid is not None: - ethtool(f"-X {cfg.ifname} context {cid} delete") - ethtool(f"-X {cfg.ifname} default") - if qcnt: - ethtool(f"-L {cfg.ifname} combined {qcnt}") + # Remove last context + remove_ctx(-1) + check_traffic() if requested_ctx_cnt != ctx_cnt: raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") @@ -298,69 +280,58 @@ def test_rss_context_overlap(cfg, other_ctx=0): require_ntuple(cfg) queue_cnt = len(_get_rx_cnts(cfg)) - if queue_cnt >= 4: - queue_cnt = None - else: + if queue_cnt < 4: try: ksft_pr(f"Increasing queue count {queue_cnt} -> 4") ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") except: raise KsftSkipEx("Not enough queues for the test") - ctx_id = None - ntuple = None if other_ctx == 0: ethtool(f"-X {cfg.ifname} equal 4") + defer(ethtool, f"-X {cfg.ifname} default") else: other_ctx = ethtool_create(cfg, "-X", "context new") ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4") + defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete") - try: - ctx_id = ethtool_create(cfg, "-X", "context new") - ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2") + ctx_id = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") - port = rand_port() - if other_ctx: - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}" - ntuple = ethtool_create(cfg, "-N", flow) + port = rand_port() + if other_ctx: + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") - # Test the main context - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + # Test the main context + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) - ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts)) - ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts)) - ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts)) - if other_ctx == 0: - ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) - - # Now create a rule for context 1 and make sure traffic goes to a subset - if other_ctx: - ethtool(f"-N {cfg.ifname} delete {ntuple}") - ntuple = None - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" - ntuple = ethtool_create(cfg, "-N", flow) + ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts)) + ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + # Now create a rule for context 1 and make sure traffic goes to a subset + if other_ctx: + ntuple.exec() + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") - ksft_lt(sum(cnts[ :2]), 7000, "traffic on main context: " + str(cnts)) - ksft_ge(sum(cnts[2:4]), 20000, "traffic on extra context: " + str(cnts)) - if other_ctx == 0: - ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) - finally: - if ntuple is not None: - ethtool(f"-N {cfg.ifname} delete {ntuple}") - if ctx_id: - ethtool(f"-X {cfg.ifname} context {ctx_id} delete") - if other_ctx == 0: - ethtool(f"-X {cfg.ifname} default") - else: - ethtool(f"-X {cfg.ifname} context {other_ctx} delete") - if queue_cnt: - ethtool(f"-L {cfg.ifname} combined {queue_cnt}") + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_lt(sum(cnts[ :2]), 7000, "traffic on main context: " + str(cnts)) + ksft_ge(sum(cnts[2:4]), 20000, "traffic on extra context: " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) def test_rss_context_overlap2(cfg): diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index b6ce3f33d41e..3aaa2748a58e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -6,6 +6,7 @@ import sys import time import traceback from .consts import KSFT_MAIN_NAME +from .utils import global_defer_queue KSFT_RESULT = None KSFT_RESULT_ALL = True @@ -108,6 +109,24 @@ def ktap_result(ok, cnt=1, case="", comment=""): print(res) +def ksft_flush_defer(): + global KSFT_RESULT + + i = 0 + qlen_start = len(global_defer_queue) + while global_defer_queue: + i += 1 + entry = global_defer_queue.pop() + try: + entry.exec_only() + except: + ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Defer Exception|", line) + KSFT_RESULT = False + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] @@ -130,29 +149,31 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): for case in cases: KSFT_RESULT = True cnt += 1 + comment = "" + cnt_key = "" + try: case(*args) except KsftSkipEx as e: - ktap_result(True, cnt, case, comment="SKIP " + str(e)) - totals['skip'] += 1 - continue + comment = "SKIP " + str(e) + cnt_key = 'skip' except KsftXfailEx as e: - ktap_result(True, cnt, case, comment="XFAIL " + str(e)) - totals['xfail'] += 1 - continue + comment = "XFAIL " + str(e) + cnt_key = 'xfail' except Exception as e: tb = traceback.format_exc() for line in tb.strip().split('\n'): ksft_pr("Exception|", line) - ktap_result(False, cnt, case) - totals['fail'] += 1 - continue - - ktap_result(KSFT_RESULT, cnt, case) - if KSFT_RESULT: - totals['pass'] += 1 - else: - totals['fail'] += 1 + KSFT_RESULT = False + cnt_key = 'fail' + + ksft_flush_defer() + + if not cnt_key: + cnt_key = 'pass' if KSFT_RESULT else 'fail' + + ktap_result(KSFT_RESULT, cnt, case, comment=comment) + totals[cnt_key] += 1 print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 405aa510aaf2..72590c3f90f1 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -66,6 +66,40 @@ class bkg(cmd): return self.process(terminate=self.terminate, fail=self.check_fail) +global_defer_queue = [] + + +class defer: + def __init__(self, func, *args, **kwargs): + global global_defer_queue + + if not callable(func): + raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") + + self.func = func + self.args = args + self.kwargs = kwargs + + self._queue = global_defer_queue + self._queue.append(self) + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + return self.exec() + + def exec_only(self): + self.func(*self.args, **self.kwargs) + + def cancel(self): + self._queue.remove(self) + + def exec(self): + self.cancel() + self.exec_only() + + def tool(name, args, json=None, ns=None, host=None): cmd_str = name + ' ' if json: diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 8538f08c64c2..c61d23a8c88d 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -375,6 +375,42 @@ EOF wait 2>/dev/null } +test_queue_removal() +{ + read tainted_then < /proc/sys/kernel/tainted + + ip netns exec "$ns1" nft -f - <<EOF +flush ruleset +table ip filter { + chain output { + type filter hook output priority 0; policy accept; + ip protocol icmp queue num 0 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 + + ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null + kill $nfqpid + + ip netns exec "$ns1" nft flush ruleset + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: queue program exiting while packets queued" + else + echo "TAINT: queue program exiting while packets queued" + ret=1 + fi +} + ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -413,5 +449,6 @@ test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue test_icmp_vrf +test_queue_removal exit $ret diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 85b3baa3f7f3..3e74cfa1a2bf 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -53,6 +53,7 @@ static bool cfg_do_ipv6; static bool cfg_do_connected; static bool cfg_do_connectionless; static bool cfg_do_msgmore; +static bool cfg_do_recv = true; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; @@ -414,6 +415,9 @@ static void run_one(struct testcase *test, int fdt, int fdr, if (!sent) return; + if (!cfg_do_recv) + return; + if (test->gso_len) mss = test->gso_len; else @@ -464,8 +468,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) if (fdr == -1) error(1, errno, "socket r"); - if (bind(fdr, addr, alen)) - error(1, errno, "bind"); + if (cfg_do_recv) { + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + } /* Have tests fail quickly instead of hang */ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) @@ -524,7 +530,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + while ((c = getopt(argc, argv, "46cCmRst:")) != -1) { switch (c) { case '4': cfg_do_ipv4 = true; @@ -541,6 +547,9 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_do_msgmore = true; break; + case 'R': + cfg_do_recv = false; + break; case 's': cfg_do_setsockopt = true; break; diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index 6c63178086b0..85d1fa3c1ff7 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -27,6 +27,31 @@ test_route_mtu() { ip route add local fd00::1/128 table local dev lo mtu 1500 } +setup_dummy_sink() { + ip link add name sink mtu 1500 type dummy + ip addr add dev sink 10.0.0.0/24 + ip addr add dev sink fd00::2/64 nodad + ip link set dev sink up +} + +test_hw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation on >/dev/null +} + +test_sw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + +test_sw_gso_sw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic off >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + if [ "$#" -gt 0 ]; then "$1" shift 2 # pop "test_*" arg and "--" delimiter @@ -56,3 +81,21 @@ echo "ipv4 msg_more" echo "ipv6 msg_more" ./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m + +echo "ipv4 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -6 -C -R |