diff options
70 files changed, 968 insertions, 416 deletions
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 1432fda3b603..47bc2057e629 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -40,6 +40,9 @@ properties: clock-names: const: stmmaceth + phy-supply: + description: PHY regulator + syscon: $ref: /schemas/types.yaml#/definitions/phandle description: diff --git a/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml b/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml index d2906b4a0f59..e35da8b01dc2 100644 --- a/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml +++ b/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml @@ -16,9 +16,6 @@ description: | 8k has a second unit which provides an interface with the xMDIO bus. This driver handles these interfaces. -allOf: - - $ref: "mdio.yaml#" - properties: compatible: enum: @@ -39,13 +36,38 @@ required: - compatible - reg +allOf: + - $ref: mdio.yaml# + + - if: + required: + - interrupts + + then: + properties: + reg: + items: + - items: + - $ref: /schemas/types.yaml#/definitions/cell + - const: 0x84 + + else: + properties: + reg: + items: + - items: + - $ref: /schemas/types.yaml#/definitions/cell + - enum: + - 0x4 + - 0x10 + unevaluatedProperties: false examples: - | mdio@d0072004 { compatible = "marvell,orion-mdio"; - reg = <0xd0072004 0x4>; + reg = <0xd0072004 0x84>; #address-cells = <1>; #size-cells = <0>; interrupts = <30>; diff --git a/MAINTAINERS b/MAINTAINERS index 7f0b7181e60a..758878c0eddf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22243,7 +22243,9 @@ F: drivers/scsi/vmw_pvscsi.c F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER -M: Vivek Thampi <vithampi@vmware.com> +M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu> +M: Deep Shah <sdeep@vmware.com> +R: Alexey Makhalov <amakhalov@vmware.com> R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> L: netdev@vger.kernel.org S: Supported diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 455b555275f1..c99ffe6c683a 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1549,6 +1549,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", port->actor_port_number); + return; } } /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index c5c3b4e92f28..2f224b166bbb 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -37,77 +37,104 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) } static int -qca8k_set_lo(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 lo) +qca8k_mii_write_lo(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) { - u16 *cached_lo = &priv->mdio_cache.lo; - struct mii_bus *bus = priv->bus; int ret; + u16 lo; - if (lo == *cached_lo) - return 0; - + lo = val & 0xffff; ret = bus->write(bus, phy_id, regnum, lo); if (ret < 0) dev_err_ratelimited(&bus->dev, "failed to write qca8k 32bit lo register\n"); - *cached_lo = lo; - return 0; + return ret; } static int -qca8k_set_hi(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 hi) +qca8k_mii_write_hi(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) { - u16 *cached_hi = &priv->mdio_cache.hi; - struct mii_bus *bus = priv->bus; int ret; + u16 hi; - if (hi == *cached_hi) - return 0; - + hi = (u16)(val >> 16); ret = bus->write(bus, phy_id, regnum, hi); if (ret < 0) dev_err_ratelimited(&bus->dev, "failed to write qca8k 32bit hi register\n"); - *cached_hi = hi; - return 0; + return ret; } static int -qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) +qca8k_mii_read_lo(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) { int ret; ret = bus->read(bus, phy_id, regnum); - if (ret >= 0) { - *val = ret; - ret = bus->read(bus, phy_id, regnum + 1); - *val |= ret << 16; - } + if (ret < 0) + goto err; - if (ret < 0) { - dev_err_ratelimited(&bus->dev, - "failed to read qca8k 32bit register\n"); - *val = 0; - return ret; - } + *val = ret & 0xffff; + return 0; + +err: + dev_err_ratelimited(&bus->dev, + "failed to read qca8k 32bit lo register\n"); + *val = 0; + + return ret; +} +static int +qca8k_mii_read_hi(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) +{ + int ret; + + ret = bus->read(bus, phy_id, regnum); + if (ret < 0) + goto err; + + *val = ret << 16; return 0; + +err: + dev_err_ratelimited(&bus->dev, + "failed to read qca8k 32bit hi register\n"); + *val = 0; + + return ret; } -static void -qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val) +static int +qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) { - u16 lo, hi; + u32 hi, lo; int ret; - lo = val & 0xffff; - hi = (u16)(val >> 16); + *val = 0; - ret = qca8k_set_lo(priv, phy_id, regnum, lo); - if (ret >= 0) - ret = qca8k_set_hi(priv, phy_id, regnum + 1, hi); + ret = qca8k_mii_read_lo(bus, phy_id, regnum, &lo); + if (ret < 0) + goto err; + + ret = qca8k_mii_read_hi(bus, phy_id, regnum + 1, &hi); + if (ret < 0) + goto err; + + *val = lo | hi; + +err: + return ret; +} + +static void +qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) +{ + if (qca8k_mii_write_lo(bus, phy_id, regnum, val) < 0) + return; + + qca8k_mii_write_hi(bus, phy_id, regnum + 1, val); } static int @@ -146,7 +173,16 @@ static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb) command = get_unaligned_le32(&mgmt_ethhdr->command); cmd = FIELD_GET(QCA_HDR_MGMT_CMD, command); + len = FIELD_GET(QCA_HDR_MGMT_LENGTH, command); + /* Special case for len of 15 as this is the max value for len and needs to + * be increased before converting it from word to dword. + */ + if (len == 15) + len++; + + /* We can ignore odd value, we always round up them in the alloc function. */ + len *= sizeof(u16); /* Make sure the seq match the requested packet */ if (get_unaligned_le32(&mgmt_ethhdr->seq) == mgmt_eth_data->seq) @@ -193,17 +229,33 @@ static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 * if (!skb) return NULL; - /* Max value for len reg is 15 (0xf) but the switch actually return 16 byte - * Actually for some reason the steps are: - * 0: nothing - * 1-4: first 4 byte - * 5-6: first 12 byte - * 7-15: all 16 byte + /* Hdr mgmt length value is in step of word size. + * As an example to process 4 byte of data the correct length to set is 2. + * To process 8 byte 4, 12 byte 6, 16 byte 8... + * + * Odd values will always return the next size on the ack packet. + * (length of 3 (6 byte) will always return 8 bytes of data) + * + * This means that a value of 15 (0xf) actually means reading/writing 32 bytes + * of data. + * + * To correctly calculate the length we devide the requested len by word and + * round up. + * On the ack function we can skip the odd check as we already handle the + * case here. */ - if (len == 16) - real_len = 15; - else - real_len = len; + real_len = DIV_ROUND_UP(len, sizeof(u16)); + + /* We check if the result len is odd and we round up another time to + * the next size. (length of 3 will be increased to 4 as switch will always + * return 8 bytes) + */ + if (real_len % sizeof(u16) != 0) + real_len++; + + /* Max reg value is 0xf(15) but switch will always return the next size (32 byte) */ + if (real_len == 16) + real_len--; skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); @@ -417,7 +469,7 @@ qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val) if (ret < 0) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); exit: mutex_unlock(&bus->mdio_lock); @@ -450,7 +502,7 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_ val &= ~mask; val |= write_val; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); exit: mutex_unlock(&bus->mdio_lock); @@ -688,9 +740,9 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask) qca8k_split_addr(reg, &r1, &r2, &page); - ret = read_poll_timeout(qca8k_mii_read32, ret1, !(val & mask), 0, + ret = read_poll_timeout(qca8k_mii_read_hi, ret1, !(val & mask), 0, QCA8K_BUSY_WAIT_TIMEOUT * USEC_PER_MSEC, false, - bus, 0x10 | r2, r1, &val); + bus, 0x10 | r2, r1 + 1, &val); /* Check if qca8k_read has failed for a different reason * before returnting -ETIMEDOUT @@ -725,14 +777,14 @@ qca8k_mdio_write(struct qca8k_priv *priv, int phy, int regnum, u16 data) if (ret) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(priv, 0x10 | r2, r1, 0); + qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, 0); mutex_unlock(&bus->mdio_lock); @@ -762,18 +814,18 @@ qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum) if (ret) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); if (ret) goto exit; - ret = qca8k_mii_read32(bus, 0x10 | r2, r1, &val); + ret = qca8k_mii_read_lo(bus, 0x10 | r2, r1, &val); exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(priv, 0x10 | r2, r1, 0); + qca8k_mii_write_hi(bus, 0x10 | r2, r1 + 1, 0); mutex_unlock(&bus->mdio_lock); @@ -1943,8 +1995,6 @@ qca8k_sw_probe(struct mdio_device *mdiodev) } priv->mdio_cache.page = 0xffff; - priv->mdio_cache.lo = 0xffff; - priv->mdio_cache.hi = 0xffff; /* Check the detected switch id */ ret = qca8k_read_switch_id(priv); diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index 0b7a5cb12321..03514f7a20be 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -375,11 +375,6 @@ struct qca8k_mdio_cache { * mdio writes */ u16 page; -/* lo and hi can also be cached and from Documentation we can skip one - * extra mdio write if lo or hi is didn't change. - */ - u16 lo; - u16 hi; }; struct qca8k_pcs { diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 8c8b4c88c7de..451c3a1b6255 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2400,29 +2400,18 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, return -EOPNOTSUPP; } - switch (func) { - case ENA_ADMIN_TOEPLITZ: - if (key) { - if (key_len != sizeof(hash_key->key)) { - netdev_err(ena_dev->net_device, - "key len (%u) doesn't equal the supported size (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; - } - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->key_parts = key_len / sizeof(hash_key->key[0]); + if ((func == ENA_ADMIN_TOEPLITZ) && key) { + if (key_len != sizeof(hash_key->key)) { + netdev_err(ena_dev->net_device, + "key len (%u) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; } - break; - case ENA_ADMIN_CRC32: - rss->hash_init_val = init_val; - break; - default: - netdev_err(ena_dev->net_device, "Invalid hash function (%d)\n", - func); - return -EINVAL; + memcpy(hash_key->key, key, key_len); + hash_key->key_parts = key_len / sizeof(hash_key->key[0]); } + rss->hash_init_val = init_val; old_func = rss->hash_func; rss->hash_func = func; rc = ena_com_set_hash_function(ena_dev); diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 48ae6d810f8f..8da79eedc057 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -887,11 +887,7 @@ static int ena_set_tunable(struct net_device *netdev, switch (tuna->id) { case ETHTOOL_RX_COPYBREAK: len = *(u32 *)data; - if (len > adapter->netdev->mtu) { - ret = -EINVAL; - break; - } - adapter->rx_copybreak = len; + ret = ena_set_rx_copybreak(adapter, len); break; default: ret = -EINVAL; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a95529a69cbb..e8ad5ea31aff 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -374,9 +374,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n, static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) { + u32 verdict = ENA_XDP_PASS; struct bpf_prog *xdp_prog; struct ena_ring *xdp_ring; - u32 verdict = XDP_PASS; struct xdp_frame *xdpf; u64 *xdp_stat; @@ -393,7 +393,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) if (unlikely(!xdpf)) { trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = XDP_ABORTED; + verdict = ENA_XDP_DROP; break; } @@ -409,29 +409,35 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) spin_unlock(&xdp_ring->xdp_tx_lock); xdp_stat = &rx_ring->rx_stats.xdp_tx; + verdict = ENA_XDP_TX; break; case XDP_REDIRECT: if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { xdp_stat = &rx_ring->rx_stats.xdp_redirect; + verdict = ENA_XDP_REDIRECT; break; } trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = XDP_ABORTED; + verdict = ENA_XDP_DROP; break; case XDP_ABORTED: trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; break; case XDP_DROP: xdp_stat = &rx_ring->rx_stats.xdp_drop; + verdict = ENA_XDP_DROP; break; case XDP_PASS: xdp_stat = &rx_ring->rx_stats.xdp_pass; + verdict = ENA_XDP_PASS; break; default: bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_invalid; + verdict = ENA_XDP_DROP; } ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); @@ -512,16 +518,18 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, struct bpf_prog *prog, int first, int count) { + struct bpf_prog *old_bpf_prog; struct ena_ring *rx_ring; int i = 0; for (i = first; i < count; i++) { rx_ring = &adapter->rx_ring[i]; - xchg(&rx_ring->xdp_bpf_prog, prog); - if (prog) { + old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); + + if (!old_bpf_prog && prog) { ena_xdp_register_rxq_info(rx_ring); rx_ring->rx_headroom = XDP_PACKET_HEADROOM; - } else { + } else if (old_bpf_prog && !prog) { ena_xdp_unregister_rxq_info(rx_ring); rx_ring->rx_headroom = NET_SKB_PAD; } @@ -672,6 +680,7 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, ring->ena_dev = adapter->ena_dev; ring->per_napi_packets = 0; ring->cpu = 0; + ring->numa_node = 0; ring->no_interrupt_event_cnt = 0; u64_stats_init(&ring->syncp); } @@ -775,6 +784,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; tx_ring->cpu = ena_irq->cpu; + tx_ring->numa_node = node; return 0; err_push_buf_intermediate_buf: @@ -907,6 +917,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; rx_ring->cpu = ena_irq->cpu; + rx_ring->numa_node = node; return 0; } @@ -1619,12 +1630,12 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) * we expect, then we simply drop it */ if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) - return XDP_DROP; + return ENA_XDP_DROP; ret = ena_xdp_execute(rx_ring, xdp); /* The xdp program might expand the headers */ - if (ret == XDP_PASS) { + if (ret == ENA_XDP_PASS) { rx_info->page_offset = xdp->data - xdp->data_hard_start; rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; } @@ -1663,7 +1674,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq); do { - xdp_verdict = XDP_PASS; + xdp_verdict = ENA_XDP_PASS; skb = NULL; ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = rx_ring->sgl_size; @@ -1691,7 +1702,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); /* allocate skb and fill it */ - if (xdp_verdict == XDP_PASS) + if (xdp_verdict == ENA_XDP_PASS) skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, @@ -1709,14 +1720,15 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* Packets was passed for transmission, unmap it * from RX side. */ - if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) { + if (xdp_verdict & ENA_XDP_FORWARDED) { ena_unmap_rx_buff(rx_ring, &rx_ring->rx_buffer_info[req_id]); rx_ring->rx_buffer_info[req_id].page = NULL; } } - if (xdp_verdict != XDP_PASS) { + if (xdp_verdict != ENA_XDP_PASS) { xdp_flags |= xdp_verdict; + total_len += ena_rx_ctx.ena_bufs[0].len; res_budget--; continue; } @@ -1760,7 +1772,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ena_refill_rx_bufs(rx_ring, refill_required); } - if (xdp_flags & XDP_REDIRECT) + if (xdp_flags & ENA_XDP_REDIRECT) xdp_do_flush_map(); return work_done; @@ -1814,8 +1826,9 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) static void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { + u32 rx_interval = tx_ring->smoothed_interval; struct ena_eth_io_intr_reg intr_reg; - u32 rx_interval = 0; + /* Rx ring can be NULL when for XDP tx queues which don't have an * accompanying rx_ring pair. */ @@ -1853,20 +1866,27 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring, if (likely(tx_ring->cpu == cpu)) goto out; + tx_ring->cpu = cpu; + if (rx_ring) + rx_ring->cpu = cpu; + numa_node = cpu_to_node(cpu); + + if (likely(tx_ring->numa_node == numa_node)) + goto out; + put_cpu(); if (numa_node != NUMA_NO_NODE) { ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); - if (rx_ring) + tx_ring->numa_node = numa_node; + if (rx_ring) { + rx_ring->numa_node = numa_node; ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + } } - tx_ring->cpu = cpu; - if (rx_ring) - rx_ring->cpu = cpu; - return; out: put_cpu(); @@ -1987,11 +2007,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget) if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) ena_adjust_adaptive_rx_intr_moderation(ena_napi); + ena_update_ring_numa_node(tx_ring, rx_ring); ena_unmask_interrupt(tx_ring, rx_ring); } - ena_update_ring_numa_node(tx_ring, rx_ring); - ret = rx_work_done; } else { ret = budget; @@ -2376,7 +2395,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) ctx.mem_queue_type = ena_dev->tx_mem_queue_type; ctx.msix_vector = msix_vector; ctx.queue_size = tx_ring->ring_size; - ctx.numa_node = cpu_to_node(tx_ring->cpu); + ctx.numa_node = tx_ring->numa_node; rc = ena_com_create_io_queue(ena_dev, &ctx); if (rc) { @@ -2444,7 +2463,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ctx.msix_vector = msix_vector; ctx.queue_size = rx_ring->ring_size; - ctx.numa_node = cpu_to_node(rx_ring->cpu); + ctx.numa_node = rx_ring->numa_node; rc = ena_com_create_io_queue(ena_dev, &ctx); if (rc) { @@ -2805,6 +2824,24 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, return dev_was_up ? ena_up(adapter) : 0; } +int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak) +{ + struct ena_ring *rx_ring; + int i; + + if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE)) + return -EINVAL; + + adapter->rx_copybreak = rx_copybreak; + + for (i = 0; i < adapter->num_io_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + rx_ring->rx_copybreak = rx_copybreak; + } + + return 0; +} + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) { struct ena_com_dev *ena_dev = adapter->ena_dev; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 1bdce99bf688..2cb141079474 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -262,9 +262,11 @@ struct ena_ring { bool disable_meta_caching; u16 no_interrupt_event_cnt; - /* cpu for TPH */ + /* cpu and NUMA for TPH */ int cpu; - /* number of tx/rx_buffer_info's entries */ + int numa_node; + + /* number of tx/rx_buffer_info's entries */ int ring_size; enum ena_admin_placement_policy_type tx_mem_queue_type; @@ -392,6 +394,8 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); +int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak); + int ena_get_sset_count(struct net_device *netdev, int sset); static inline void ena_reset_device(struct ena_adapter *adapter, @@ -409,6 +413,15 @@ enum ena_xdp_errors_t { ENA_XDP_NO_ENOUGH_QUEUES, }; +enum ENA_XDP_ACTIONS { + ENA_XDP_PASS = 0, + ENA_XDP_TX = BIT(0), + ENA_XDP_REDIRECT = BIT(1), + ENA_XDP_DROP = BIT(2) +}; + +#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) + static inline bool ena_xdp_present(struct ena_adapter *adapter) { return !!adapter->xdp_bpf_prog; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 7b666106feee..614c0278419b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1064,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + tasklet_kill(&pdata->tasklet_dev); + tasklet_kill(&pdata->tasklet_ecc); + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c index 22d4fc547a0a..a9ccc4258ee5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) xgbe_i2c_disable(pdata); xgbe_i2c_clear_all_interrupts(pdata); - if (pdata->dev_irq != pdata->i2c_irq) + if (pdata->dev_irq != pdata->i2c_irq) { devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); + tasklet_kill(&pdata->tasklet_i2c); + } } static int xgbe_i2c_start(struct xgbe_prv_data *pdata) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 4e97b4869522..0c5c1b155683 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1390,8 +1390,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) /* Disable auto-negotiation */ xgbe_an_disable_all(pdata); - if (pdata->dev_irq != pdata->an_irq) + if (pdata->dev_irq != pdata->an_irq) { devm_free_irq(pdata->dev, pdata->an_irq, pdata); + tasklet_kill(&pdata->tasklet_an); + } pdata->phy_if.phy_impl.stop(pdata); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index d91fdb0c2649..2cf96892e565 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2784,17 +2784,11 @@ static int bcm_enet_shared_probe(struct platform_device *pdev) return 0; } -static int bcm_enet_shared_remove(struct platform_device *pdev) -{ - return 0; -} - /* this "shared" driver is needed because both macs share a single * address space */ struct platform_driver bcm63xx_enet_shared_driver = { .probe = bcm_enet_shared_probe, - .remove = bcm_enet_shared_remove, .driver = { .name = "bcm63xx_enet_shared", .owner = THIS_MODULE, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c index 91f02c505028..b307bef4dc29 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c @@ -127,11 +127,6 @@ static int enetc_ierb_probe(struct platform_device *pdev) return 0; } -static int enetc_ierb_remove(struct platform_device *pdev) -{ - return 0; -} - static const struct of_device_id enetc_ierb_match[] = { { .compatible = "fsl,ls1028a-enetc-ierb", }, {}, @@ -144,7 +139,6 @@ static struct platform_driver enetc_ierb_driver = { .of_match_table = enetc_ierb_match, }, .probe = enetc_ierb_probe, - .remove = enetc_ierb_remove, }; module_platform_driver(enetc_ierb_driver); diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index d00bae15a901..d528ca681b6f 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -1430,7 +1430,7 @@ int dtsec_initialization(struct mac_device *mac_dev, dtsec->dtsec_drv_param->tx_pad_crc = true; phy_node = of_parse_phandle(mac_node, "tbi-handle", 0); - if (!phy_node || of_device_is_available(phy_node)) { + if (!phy_node || !of_device_is_available(phy_node)) { of_node_put(phy_node); err = -EINVAL; dev_err_probe(mac_dev->dev, err, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 6c2742f59c77..07ad5f35219e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3910,9 +3910,17 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) return ret; } - if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + if (!reset || + !test_bit(HCLGE_VPORT_STATE_INITED, &vport->state)) continue; + if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) && + hdev->reset_type == HNAE3_FUNC_RESET) { + set_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, + &vport->need_notify); + continue; + } + /* Inform VF to process the reset. * hclge_inform_reset_assert_to_vf may fail if VF * driver is not loaded. @@ -4609,18 +4617,25 @@ static void hclge_reset_service_task(struct hclge_dev *hdev) static void hclge_update_vport_alive(struct hclge_dev *hdev) { +#define HCLGE_ALIVE_SECONDS_NORMAL 8 + + unsigned long alive_time = HCLGE_ALIVE_SECONDS_NORMAL * HZ; int i; /* start from vport 1 for PF is always alive */ for (i = 1; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; - if (time_after(jiffies, vport->last_active_jiffies + 8 * HZ)) + if (!test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) || + !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + continue; + if (time_after(jiffies, vport->last_active_jiffies + + alive_time)) { clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); - - /* If vf is not alive, set to default value */ - if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) - vport->mps = HCLGE_MAC_DEFAULT_FRAME; + dev_warn(&hdev->pdev->dev, + "VF %u heartbeat timeout\n", + i - HCLGE_VF_VPORT_START_NUM); + } } } @@ -8064,9 +8079,11 @@ int hclge_vport_start(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; + set_bit(HCLGE_VPORT_STATE_INITED, &vport->state); set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); vport->last_active_jiffies = jiffies; + vport->need_notify = 0; if (test_bit(vport->vport_id, hdev->vport_config_block)) { if (vport->vport_id) { @@ -8084,7 +8101,9 @@ int hclge_vport_start(struct hclge_vport *vport) void hclge_vport_stop(struct hclge_vport *vport) { + clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state); clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + vport->need_notify = 0; } static int hclge_client_start(struct hnae3_handle *handle) @@ -9208,7 +9227,8 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, return 0; } - dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n", + dev_info(&hdev->pdev->dev, + "MAC of VF %d has been set to %s, will be active after VF reset\n", vf, format_mac_addr); return 0; } @@ -10465,12 +10485,16 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, * for DEVICE_VERSION_V3, vf doesn't need to know about the port based * VLAN state. */ - if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && - test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) - (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], - vport->vport_id, - state, &vlan_info); - + if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) { + if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], + vport->vport_id, + state, + &vlan_info); + else + set_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, + &vport->need_notify); + } return 0; } @@ -11941,7 +11965,7 @@ static void hclge_reset_vport_state(struct hclge_dev *hdev) int i; for (i = 0; i < hdev->num_alloc_vport; i++) { - hclge_vport_stop(vport); + clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); vport++; } } @@ -12955,6 +12979,11 @@ static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid) struct hclge_vlan_info vlan_info; int ret; + clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state); + clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + vport->need_notify = 0; + vport->mps = 0; + /* after disable sriov, clean VF rate configured by PF */ ret = hclge_tm_qs_shaper_cfg(vport, 0); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 495b639b0dc2..13f23d606e77 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -995,9 +995,15 @@ enum HCLGE_VPORT_STATE { HCLGE_VPORT_STATE_MAC_TBL_CHANGE, HCLGE_VPORT_STATE_PROMISC_CHANGE, HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, + HCLGE_VPORT_STATE_INITED, HCLGE_VPORT_STATE_MAX }; +enum HCLGE_VPORT_NEED_NOTIFY { + HCLGE_VPORT_NEED_NOTIFY_RESET, + HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, +}; + struct hclge_vlan_info { u16 vlan_proto; /* so far support 802.1Q only */ u16 qos; @@ -1044,6 +1050,7 @@ struct hclge_vport { struct hnae3_handle roce; unsigned long state; + unsigned long need_notify; unsigned long last_active_jiffies; u32 mps; /* Max packet size */ struct hclge_vf_info vf_info; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index a7b06c63143c..04ff9bf12185 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -124,17 +124,26 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, return status; } +static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type) +{ + __le16 msg_data; + u8 dest_vfid; + + dest_vfid = (u8)vport->vport_id; + msg_data = cpu_to_le16(reset_type); + + /* send this requested info to VF */ + return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data), + HCLGE_MBX_ASSERTING_RESET, dest_vfid); +} + int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; - __le16 msg_data; u16 reset_type; - u8 dest_vfid; BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX); - dest_vfid = (u8)vport->vport_id; - if (hdev->reset_type == HNAE3_FUNC_RESET) reset_type = HNAE3_VF_PF_FUNC_RESET; else if (hdev->reset_type == HNAE3_FLR_RESET) @@ -142,11 +151,7 @@ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) else reset_type = HNAE3_VF_FUNC_RESET; - msg_data = cpu_to_le16(reset_type); - - /* send this requested info to VF */ - return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data), - HCLGE_MBX_ASSERTING_RESET, dest_vfid); + return hclge_inform_vf_reset(vport, reset_type); } static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head) @@ -652,9 +657,56 @@ static int hclge_reset_vf(struct hclge_vport *vport) return hclge_func_reset_cmd(hdev, vport->vport_id); } +static void hclge_notify_vf_config(struct hclge_vport *vport) +{ + struct hclge_dev *hdev = vport->back; + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + struct hclge_port_base_vlan_config *vlan_cfg; + int ret; + + hclge_push_vf_link_status(vport); + if (test_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, &vport->need_notify)) { + ret = hclge_inform_vf_reset(vport, HNAE3_VF_PF_FUNC_RESET); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to inform VF %u reset!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + return; + } + vport->need_notify = 0; + return; + } + + if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && + test_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify)) { + vlan_cfg = &vport->port_base_vlan_cfg; + ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0], + vport->vport_id, + vlan_cfg->state, + &vlan_cfg->vlan_info); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to inform VF %u port base vlan!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + return; + } + clear_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify); + } +} + static void hclge_vf_keep_alive(struct hclge_vport *vport) { + struct hclge_dev *hdev = vport->back; + vport->last_active_jiffies = jiffies; + + if (test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) && + !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) { + set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + dev_info(&hdev->pdev->dev, "VF %u is alive!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + hclge_notify_vf_config(vport); + } } static int hclge_set_vf_mtu(struct hclge_vport *vport, @@ -954,6 +1006,7 @@ static int hclge_mbx_vf_uninit_handler(struct hclge_mbx_ops_param *param) hclge_rm_vport_all_mac_table(param->vport, true, HCLGE_MAC_ADDR_MC); hclge_rm_vport_all_vlan_table(param->vport, true); + param->vport->mps = 0; return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 9e10e7471b88..88f8772a61cd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1012,6 +1012,7 @@ static void otx2_pool_refill_task(struct work_struct *work) rbpool = cq->rbpool; free_ptrs = cq->pool_ptrs; + get_cpu(); while (cq->pool_ptrs) { if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) { /* Schedule a WQ if we fails to free atleast half of the @@ -1031,6 +1032,7 @@ static void otx2_pool_refill_task(struct work_struct *work) pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } + put_cpu(); cq->refill_task_sched = false; } @@ -1368,6 +1370,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) if (err) goto fail; + get_cpu(); /* Allocate pointers and free them to aura/pool */ for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); @@ -1376,18 +1379,24 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) sq = &qset->sq[qidx]; sq->sqb_count = 0; sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL); - if (!sq->sqb_ptrs) - return -ENOMEM; + if (!sq->sqb_ptrs) { + err = -ENOMEM; + goto err_mem; + } for (ptr = 0; ptr < num_sqbs; ptr++) { - if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) - return -ENOMEM; + err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + if (err) + goto err_mem; pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; } } - return 0; +err_mem: + put_cpu(); + return err ? -ENOMEM : 0; + fail: otx2_mbox_reset(&pfvf->mbox.mbox, 0); otx2_aura_pool_free(pfvf); @@ -1426,18 +1435,21 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) if (err) goto fail; + get_cpu(); /* Allocate pointers and free them to aura/pool */ for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { pool = &pfvf->qset.pool[pool_id]; for (ptr = 0; ptr < num_ptrs; ptr++) { - if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) - return -ENOMEM; + err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + if (err) + goto err_mem; pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr + OTX2_HEAD_ROOM); } } - - return 0; +err_mem: + put_cpu(); + return err ? -ENOMEM : 0; fail: otx2_mbox_reset(&pfvf->mbox.mbox, 0); otx2_aura_pool_free(pfvf); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index ddb197970c22..5bd83c0275f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -468,7 +468,7 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, bool new_state = val.vbool; if (new_state && !MLX5_CAP_GEN(dev, roce) && - !MLX5_CAP_GEN(dev, roce_rw_supported)) { + !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) { NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); return -EOPNOTSUPP; } @@ -563,7 +563,7 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack) { - return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL; + return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; } static const struct devlink_param mlx5_devlink_params[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 5f6f95ad6888..1ae15b8536a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -459,7 +459,11 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, goto unlock; for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5e_rq *rq; + + rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ? + &c->xskrq : &c->rq; err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a69849e0deed..313df8232db7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2103,14 +2103,9 @@ out_err: static void mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) { - bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB; struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; - char dirname[16] = {}; - if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0) - return; - - ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev)); + ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev)); debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, &ct_dbgfs->stats.offloaded); debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index ff73d25bc6eb..2aaf8ab857b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -222,7 +222,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, int err; list_for_each_entry(flow, flow_list, tmp_list) { - if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) + if (!mlx5e_is_offloaded_flow(flow)) continue; attr = mlx5e_tc_get_encap_attr(flow); @@ -231,6 +231,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + /* Clear pkt_reformat before checking slow path flag. Because + * in next iteration, the same flow is already set slow path + * flag, but still need to clear the pkt_reformat. + */ + if (flow_flag_test(flow, SLOW)) + continue; + /* update from encap rule to slow path rule */ spec = &flow->attr->parse_attr->spec; rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index f5b26f5a7de4..054d80c4e65c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -273,6 +273,11 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); MLX5_SET(fte_match_set_misc3, misc_3_c, geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_exist)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist); + MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist); + } spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8d36e2de53a9..cff5f2e29e1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1305,7 +1305,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; - sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; sq->xsk_pool = xsk_pool; sq->stats = sq->xsk_pool ? diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 60a73990017c..6b4c9ffad95b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport) int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); struct mlx5_flow_destination drop_ctr_dst = {}; struct mlx5_flow_destination *dst = NULL; struct mlx5_fc *drop_counter = NULL; @@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, */ int table_size = 2; int dest_num = 0; + int actions_flag; int err = 0; if (vport->egress.legacy.drop_counter) { @@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, vport->vport, vport->info.vlan, vport->info.qos); /* Allowed vlan rule */ + actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_mode_steering) + actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan, - MLX5_FLOW_CONTEXT_ACTION_ALLOW); + actions_flag); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index b1a5199260f6..093ed86a0acd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport) int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); struct mlx5_flow_destination drop_ctr_dst = {}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_spec *spec = NULL; struct mlx5_fc *counter = NULL; + bool vst_check_cvlan = false; + bool vst_push_cvlan = false; /* The ingress acl table contains 4 groups * (2 active rules at the same time - * 1 allow rule from one of the first 3 groups. @@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, goto out; } - if (vport->info.vlan || vport->info.qos) + if ((vport->info.vlan || vport->info.qos)) { + if (vst_mode_steering) + vst_push_cvlan = true; + else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always)) + vst_check_cvlan = true; + } + + if (vst_check_cvlan || vport->info.spoofchk) + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* Create ingress allow rule */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_push_cvlan) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + flow_act.vlan[0].prio = vport->info.qos; + flow_act.vlan[0].vid = vport->info.vlan; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + } + + if (vst_check_cvlan) MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); @@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, ether_addr_copy(smac_v, vport->info.mac); } - /* Create ingress allow rule */ - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, NULL, 0); if (IS_ERR(vport->ingress.allow_rule)) { @@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, goto out; } + if (!vst_check_cvlan && !vport->info.spoofchk) + goto out; + memset(&flow_act, 0, sizeof(flow_act)); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach drop flow counter */ @@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, return 0; out: - esw_acl_ingress_lgcy_cleanup(esw, vport); + if (err) + esw_acl_ingress_lgcy_cleanup(esw, vport); kvfree(spec); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 527e4bffda8d..0dfd5742c6fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -161,10 +161,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, esw_vport_context.vport_cvlan_strip, 1); if (set_flags & SET_VLAN_INSERT) { - /* insert only if no vlan in packet */ - MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.vport_cvlan_insert, 1); - + if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) { + /* insert either if vlan exist in packet or not */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_ALWAYS); + } else { + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN); + } MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.cvlan_pcp, qos); MLX5_SET(modify_esw_vport_context_in, in, @@ -809,6 +816,7 @@ out_free: static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); u16 vport_num = vport->vport; int flags; int err; @@ -839,8 +847,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) flags = (vport->info.vlan || vport->info.qos) ? SET_VLAN_STRIP | SET_VLAN_INSERT : 0; - modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, - vport->info.qos, flags); + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, + vport->info.qos, flags); return 0; @@ -1848,6 +1857,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool vst_mode_steering = esw_vst_mode_is_steering(esw); int err = 0; if (IS_ERR(evport)) @@ -1855,9 +1865,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan > 4095 || qos > 7) return -EINVAL; - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); - if (err) - return err; + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) { + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); + if (err) + return err; + } evport->info.vlan = vlan; evport->info.qos = qos; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 5a85a5d32be7..92644fbb5081 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -527,6 +527,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); +static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw) +{ + return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) && + MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan)); +} + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 86ed87d704f7..96417c5feed7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -674,6 +674,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) dev = container_of(priv, struct mlx5_core_dev, priv); devlink = priv_to_devlink(dev); + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { + mlx5_core_err(dev, "health works are not permitted at this stage\n"); + return; + } + mutex_unlock(&dev->intf_state_mutex); enter_error_state(dev, false); if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { devl_lock(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 7c5c500fd215..2c73c8445e63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -71,6 +71,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; params->tunneled_offload_en = false; + + /* CQE compression is not supported for IPoIB */ + params->rx_cqe_compress_def = false; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); } /* Called directly after IPoIB netdevice was created to initialize SW structs */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 32c3e0a649a7..ad32b80e8501 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -228,6 +228,7 @@ static void mlx5_ldev_free(struct kref *ref) if (ldev->nb.notifier_call) unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); + cancel_delayed_work_sync(&ldev->bond_work); destroy_workqueue(ldev->wq); mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7f5db13e3550..df134f6d32dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -613,7 +613,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); - if (MLX5_CAP_GEN(dev, roce_rw_supported)) + if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce)) MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_on(dev)); @@ -1050,6 +1050,8 @@ err_rl_cleanup: err_tables_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_clock(dev); + mlx5_cleanup_reserved_gids(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_fw_reset_cleanup(dev); err_events_cleanup: diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index d25f4f09faa0..3c5d4fe99373 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -834,7 +834,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) if (err) goto cleanup_config; - if (!of_get_mac_address(np, sparx5->base_mac)) { + if (of_get_mac_address(np, sparx5->base_mac)) { dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n"); eth_random_addr(sparx5->base_mac); sparx5->base_mac[5] = 0; diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index a49f66efacb8..d458a35839cc 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -132,10 +132,10 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, u32 endpoint_id, bool enable) { struct ipa *ipa = interrupt->ipa; + u32 mask = BIT(endpoint_id % 32); u32 unit = endpoint_id / 32; const struct ipa_reg *reg; u32 offset; - u32 mask; u32 val; WARN_ON(!test_bit(endpoint_id, ipa->available)); @@ -148,7 +148,6 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt, offset = ipa_reg_n_offset(reg, unit); val = ioread32(ipa->reg_virt + offset); - mask = BIT(endpoint_id); if (enable) val |= mask; else diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 8dcb49ed1f3d..7fd9fe6a602b 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -105,6 +105,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev) if (!priv->phy_dev->drv) { dev_info(dev, "Attached phy not ready\n"); + put_device(&priv->phy_dev->mdio.dev); return -EPROBE_DEFER; } diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index f79333fe1783..7b3739b29c8f 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf, off = le32_to_cpu(u.get_c->offset); len = le32_to_cpu(u.get_c->len); - if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE)) + if (unlikely((off > CONTROL_BUFFER_SIZE - 8) || + (len > CONTROL_BUFFER_SIZE - 8 - off))) goto response_error; if (*reply_len != -1 && len != *reply_len) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 92224b36787a..b1b179effe2a 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2917,16 +2917,23 @@ static int vxlan_init(struct net_device *dev) vxlan_vnigroup_init(vxlan); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; + if (!dev->tstats) { + err = -ENOMEM; + goto err_vnigroup_uninit; + } err = gro_cells_init(&vxlan->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); - return err; - } + if (err) + goto err_free_percpu; return 0; + +err_free_percpu: + free_percpu(dev->tstats); +err_vnigroup_uninit: + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) + vxlan_vnigroup_uninit(vxlan); + return err; } static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index b1b5720d89a5..ee657452f122 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -45,8 +45,8 @@ struct sk_buff; QCA_HDR_MGMT_COMMAND_LEN + \ QCA_HDR_MGMT_DATA1_LEN) -#define QCA_HDR_MGMT_DATA2_LEN 12 /* Other 12 byte for the mdio data */ -#define QCA_HDR_MGMT_PADDING_LEN 34 /* Padding to reach the min Ethernet packet */ +#define QCA_HDR_MGMT_DATA2_LEN 28 /* Other 28 byte for the mdio data */ +#define QCA_HDR_MGMT_PADDING_LEN 18 /* Padding to reach the min Ethernet packet */ #define QCA_HDR_MGMT_PKT_LEN (QCA_HDR_MGMT_HEADER_LEN + \ QCA_HDR_LEN + \ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 5fe5d198b57a..29d4b201c7b2 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1091,6 +1091,11 @@ enum { }; enum { + MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN = 0x1, + MLX5_VPORT_CVLAN_INSERT_ALWAYS = 0x3, +}; + +enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3d1c62c98dd..a9ee7bc59c90 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -913,7 +913,8 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x2]; + u8 reserved_at_5[0x1]; + u8 vport_cvlan_insert_always[0x1]; u8 esw_shared_ingress_acl[0x1]; u8 esw_uplink_ingress_acl[0x1]; u8 root_ft_on_other_esw[0x1]; diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ab934ad951a8..e8c350a3ade1 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -197,7 +197,7 @@ struct ip_set_region { }; /* Max range where every element is added/deleted in one step */ -#define IPSET_MAX_RANGE (1<<20) +#define IPSET_MAX_RANGE (1<<14) /* The max revision number supported by any set type + 1 */ #define IPSET_REVISION_MAX 9 diff --git a/include/linux/phy.h b/include/linux/phy.h index 71eeb4e3b1fd..6378c997ded5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -826,10 +826,7 @@ struct phy_driver { * whether to advertise lower-speed modes for that interface. It is * assumed that if a rate matching mode is supported on an interface, * then that interface's rate can be adapted to all slower link speeds - * supported by the phy. If iface is %PHY_INTERFACE_MODE_NA, and the phy - * supports any kind of rate matching for any interface, then it must - * return that rate matching mode (preferring %RATE_MATCH_PAUSE to - * %RATE_MATCH_CRS). If the interface is not supported, this should + * supported by the phy. If the interface is not supported, this should * return %RATE_MATCH_NONE. */ int (*get_rate_matching)(struct phy_device *phydev, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 69174093078f..99bd823e97f6 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -108,6 +108,10 @@ struct inet_bind2_bucket { struct hlist_node node; /* List of sockets hashed to this bucket */ struct hlist_head owners; + /* bhash has twsk in owners, but bhash2 has twsk in + * deathrow not to add a member in struct sock_common. + */ + struct hlist_head deathrow; }; static inline struct net *ib_net(const struct inet_bind_bucket *ib) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5b47545f22d3..4a8e578405cb 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -73,9 +73,14 @@ struct inet_timewait_sock { u32 tw_priority; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; + struct inet_bind2_bucket *tw_tb2; + struct hlist_node tw_bind2_node; }; #define tw_tclass tw_tos +#define twsk_for_each_bound_bhash2(__tw, list) \ + hlist_for_each_entry(__tw, list, tw_bind2_node) + static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { return (struct inet_timewait_sock *)sk; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e69ce23566ea..9430128aae99 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -312,17 +312,29 @@ struct nft_set_iter { /** * struct nft_set_desc - description of set elements * + * @ktype: key type * @klen: key length + * @dtype: data type * @dlen: data length + * @objtype: object type + * @flags: flags * @size: number of set elements + * @policy: set policy + * @gc_int: garbage collector interval * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element * @expr: set must support for expressions */ struct nft_set_desc { + u32 ktype; unsigned int klen; + u32 dtype; unsigned int dlen; + u32 objtype; unsigned int size; + u32 policy; + u32 gc_int; + u64 timeout; u8 field_len[NFT_REG32_COUNT]; u8 field_count; bool expr; @@ -585,7 +597,9 @@ void *nft_set_catchall_gc(const struct nft_set *set); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { - return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; + u32 gc_int = READ_ONCE(set->gc_int); + + return gc_int ? msecs_to_jiffies(gc_int) : HZ; } /** @@ -1558,6 +1572,9 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; + u32 gc_int; + u64 timeout; + bool update; bool bound; }; @@ -1567,6 +1584,12 @@ struct nft_trans_set { (((struct nft_trans_set *)trans->data)->set_id) #define nft_trans_set_bound(trans) \ (((struct nft_trans_set *)trans->data)->bound) +#define nft_trans_set_update(trans) \ + (((struct nft_trans_set *)trans->data)->update) +#define nft_trans_set_timeout(trans) \ + (((struct nft_trans_set *)trans->data)->timeout) +#define nft_trans_set_gc_int(trans) \ + (((struct nft_trans_set *)trans->data)->gc_int) struct nft_trans_chain { bool update; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b366ab9148f2..848ffc3e0239 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -173,22 +173,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, return false; } +static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, + kuid_t sk_uid, bool relax, + bool reuseport_cb_ok, bool reuseport_ok) +{ + if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) + return false; + + return inet_bind_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok); +} + static bool inet_bhash2_conflict(const struct sock *sk, const struct inet_bind2_bucket *tb2, kuid_t sk_uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { + struct inet_timewait_sock *tw2; struct sock *sk2; sk_for_each_bound_bhash2(sk2, &tb2->owners) { - if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) - continue; + if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok)) + return true; + } - if (inet_bind_conflict(sk, sk2, sk_uid, relax, - reuseport_cb_ok, reuseport_ok)) + twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { + sk2 = (struct sock *)tw2; + + if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok)) return true; } + return false; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d039b4e732a3..24a38b56fab9 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, #endif tb->rcv_saddr = sk->sk_rcv_saddr; INIT_HLIST_HEAD(&tb->owners); + INIT_HLIST_HEAD(&tb->deathrow); hlist_add_head(&tb->node, &head->chain); } @@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, /* Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) { - if (hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) { __hlist_del(&tb->node); kmem_cache_free(cachep, tb); } @@ -1103,15 +1104,16 @@ ok: /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, tb2, port); - spin_unlock(&head2->lock); - if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); + + spin_unlock(&head2->lock); spin_unlock(&head->lock); + if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 66fc940f9521..1d77d992e6e7 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -29,6 +29,7 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { + struct inet_bind2_bucket *tb2 = tw->tw_tb2; struct inet_bind_bucket *tb = tw->tw_tb; if (!tb) @@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + + __hlist_del(&tw->tw_bind2_node); + tw->tw_tb2 = NULL; + inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); + __sock_put((struct sock *)tw); } @@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - struct inet_bind_hashbucket *bhead; + struct inet_bind_hashbucket *bhead, *bhead2; spin_lock(lock); sk_nulls_del_node_init_rcu((struct sock *)tw); @@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; + bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw, + twsk_net(tw), tw->tw_num); spin_lock(&bhead->lock); + spin_lock(&bhead2->lock); inet_twsk_bind_unhash(tw, hashinfo); + spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); refcount_dec(&tw->tw_dr->tw_refcount); @@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, hlist_add_head(&tw->tw_bind_node, list); } +static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind2_node, list); +} + /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -105,17 +121,28 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - struct inet_bind_hashbucket *bhead; + struct inet_bind_hashbucket *bhead, *bhead2; + /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; + bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num); + spin_lock(&bhead->lock); + spin_lock(&bhead2->lock); + tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); + + tw->tw_tb2 = icsk->icsk_bind2_hash; + WARN_ON(!icsk->icsk_bind2_hash); + inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow); + + spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); spin_lock(lock); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e7ba5b6dd2b7..46ebee9400da 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ip_set_unlock(set); retried = true; - } while (ret == -EAGAIN && - set->variant->resize && - (ret = set->variant->resize(set, retried)) == 0); + } while (ret == -ERANGE || + (ret == -EAGAIN && + set->variant->resize && + (ret = set->variant->resize(set, retried)) == 0)); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index e30513cefd90..c9f4e3859663 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -100,11 +100,11 @@ static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ip4 *h = set->data; + struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, hosts; + u32 ip = 0, ip_to = 0, hosts, i = 0; int ret = 0; if (tb[IPSET_ATTR_LINENO]) @@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); - /* 64bit division is not allowed on 32bit */ - if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to;) { + for (; ip <= ip_to; i++) { e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_ip4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 153de3457423..a22ec1a6f6ec 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -97,11 +97,11 @@ static int hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipmark4 *h = set->data; + struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0; + u32 ip, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, cidr); } - if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to; ip++, i++) { e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_ipmark4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 2ffbd0b78a8c..e977b5a9c48d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -112,11 +112,11 @@ static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipport4 *h = set->data; + struct hash_ipport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; @@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_ipport4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 334fb1ad0e86..39a01934b153 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -108,11 +108,11 @@ static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportip4 *h = set->data; + struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; @@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_ipportip4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 7df94f437f60..5c6de605a9fb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -160,12 +160,12 @@ static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportnet4 *h = set->data; + struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; u8 cidr; int ret; @@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); @@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], for (; p <= port_to; p++) { e.port = htons(p); do { + i++; e.ip2 = htonl(ip2); ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); e.cidr = cidr - 1; + if (i > IPSET_MAX_RANGE) { + hash_ipportnet4_data_next(&h->next, + &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1422739d9aa2..ce0a9ce5a91f 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -136,11 +136,11 @@ static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net4 *h = set->data; + struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ipn, n = 0; + u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); - n++; - } while (ipn++ < ip_to); - - if (n > IPSET_MAX_RANGE) - return -ERANGE; if (retried) ip = ntohl(h->next.ip); do { + i++; e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_net4_data_next(&h->next, &e); + return -ERANGE; + } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 9810f5bf63f5..031073286236 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ipn, n = 0; + u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); - n++; - } while (ipn++ < ip_to); - - if (n > IPSET_MAX_RANGE) - return -ERANGE; if (retried) ip = ntohl(h->next.ip); do { + i++; e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_netiface4_data_next(&h->next, &e); + return -ERANGE; + } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index cdfb78c6e0d3..8fbe649c9dd3 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -166,13 +166,12 @@ static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netnet4 *h = set->data; + struct hash_netnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; - u64 n = 0, m = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); - n++; - } while (ipn++ < ip_to); - ipn = ip2_from; - do { - ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); - m++; - } while (ipn++ < ip2_to); - - if (n*m > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); @@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], e.ip[0] = htonl(ip); ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); do { + i++; e.ip[1] = htonl(ip2); + if (i > IPSET_MAX_RANGE) { + hash_netnet4_data_next(&h->next, &e); + return -ERANGE; + } ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09cf72eb37f8..d1a0628df4ef 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -154,12 +154,11 @@ static int hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netport4 *h = set->data; + struct hash_netport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; - u64 n = 0; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0; bool with_ports = false; u8 cidr; int ret; @@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); - n++; - } while (ipn++ < ip_to); - - if (n*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip); @@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], e.ip = htonl(ip); ip = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_netport4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 19bcdb3141f6..005a7ce87217 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } +static u32 +hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr) +{ + if (from == 0 && to == UINT_MAX) { + *cidr = 0; + return to; + } + return ip_set_range_to_cidr(from, to, cidr); +} + static int hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netportnet4 *h = set->data; + struct hash_netportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2, ipn; - u64 n = 0, m = 0; + u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; int ret; @@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); - n++; - } while (ipn++ < ip_to); - ipn = ip2_from; - do { - ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); - m++; - } while (ipn++ < ip2_to); - - if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); @@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], do { e.ip[0] = htonl(ip); - ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); + ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]); for (; p <= port_to; p++) { e.port = htons(p); do { + i++; e.ip[1] = htonl(ip2); - ip2 = ip_set_range_to_cidr(ip2, ip2_to, - &e.cidr[1]); + if (i > IPSET_MAX_RANGE) { + hash_netportnet4_data_next(&h->next, + &e); + return -ERANGE; + } + ip2 = hash_netportnet4_range_to_cidr(ip2, + ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 99323fb12d0f..ccef340be575 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -141,6 +141,7 @@ unsigned int nf_confirm(void *priv, struct nf_conn *ct; bool seqadj_needed; __be16 frag_off; + int start; u8 pnum; ct = nf_ct_get(skb, &ctinfo); @@ -163,9 +164,11 @@ unsigned int nf_confirm(void *priv, break; case NFPROTO_IPV6: pnum = ipv6_hdr(skb)->nexthdr; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) + start = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); + if (start < 0 || (frag_off & htons(~0x7)) != 0) return nf_conntrack_confirm(skb); + + protoff = start; break; default: return nf_conntrack_confirm(skb); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 832b881f7c17..8c09e4d12ac1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) return 0; } -static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, - struct nft_set *set) +static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, + struct nft_set *set, + const struct nft_set_desc *desc) { struct nft_trans *trans; @@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, if (trans == NULL) return -ENOMEM; - if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; + if (desc) { + nft_trans_set_update(trans) = true; + nft_trans_set_gc_int(trans) = desc->gc_int; + nft_trans_set_timeout(trans) = desc->timeout; + } nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } +static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, + struct nft_set *set) +{ + return __nft_trans_set_add(ctx, msg_type, set, NULL); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -3780,8 +3792,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) static const struct nft_set_ops * nft_select_set_ops(const struct nft_ctx *ctx, const struct nlattr * const nla[], - const struct nft_set_desc *desc, - enum nft_set_policies policy) + const struct nft_set_desc *desc) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); const struct nft_set_ops *ops, *bops; @@ -3810,7 +3821,7 @@ nft_select_set_ops(const struct nft_ctx *ctx, if (!ops->estimate(desc, flags, &est)) continue; - switch (policy) { + switch (desc->policy) { case NFT_SET_POL_PERFORMANCE: if (est.lookup < best.lookup) break; @@ -4045,8 +4056,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb, static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { - struct nlmsghdr *nlh; + u64 timeout = READ_ONCE(set->timeout); + u32 gc_int = READ_ONCE(set->gc_int); u32 portid = ctx->portid; + struct nlmsghdr *nlh; struct nlattr *nest; u32 seq = ctx->seq; int i; @@ -4082,13 +4095,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) goto nla_put_failure; - if (set->timeout && + if (timeout && nla_put_be64(skb, NFTA_SET_TIMEOUT, - nf_jiffies64_to_msecs(set->timeout), + nf_jiffies64_to_msecs(timeout), NFTA_SET_PAD)) goto nla_put_failure; - if (set->gc_int && - nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) + if (gc_int && + nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int))) goto nla_put_failure; if (set->policy != NFT_SET_POL_PERFORMANCE) { @@ -4389,15 +4402,94 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, return err; } +static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr * const *nla, + struct nft_expr **exprs, int *num_exprs, + u32 flags) +{ + struct nft_expr *expr; + int err, i; + + if (nla[NFTA_SET_EXPR]) { + expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]); + if (IS_ERR(expr)) { + err = PTR_ERR(expr); + goto err_set_expr_alloc; + } + exprs[0] = expr; + (*num_exprs)++; + } else if (nla[NFTA_SET_EXPRESSIONS]) { + struct nlattr *tmp; + int left; + + if (!(flags & NFT_SET_EXPR)) { + err = -EINVAL; + goto err_set_expr_alloc; + } + i = 0; + nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { + if (i == NFT_SET_EXPR_MAX) { + err = -E2BIG; + goto err_set_expr_alloc; + } + if (nla_type(tmp) != NFTA_LIST_ELEM) { + err = -EINVAL; + goto err_set_expr_alloc; + } + expr = nft_set_elem_expr_alloc(ctx, set, tmp); + if (IS_ERR(expr)) { + err = PTR_ERR(expr); + goto err_set_expr_alloc; + } + exprs[i++] = expr; + (*num_exprs)++; + } + } + + return 0; + +err_set_expr_alloc: + for (i = 0; i < *num_exprs; i++) + nft_expr_destroy(ctx, exprs[i]); + + return err; +} + +static bool nft_set_is_same(const struct nft_set *set, + const struct nft_set_desc *desc, + struct nft_expr *exprs[], u32 num_exprs, u32 flags) +{ + int i; + + if (set->ktype != desc->ktype || + set->dtype != desc->dtype || + set->flags != flags || + set->klen != desc->klen || + set->dlen != desc->dlen || + set->field_count != desc->field_count || + set->num_exprs != num_exprs) + return false; + + for (i = 0; i < desc->field_count; i++) { + if (set->field_len[i] != desc->field_len[i]) + return false; + } + + for (i = 0; i < num_exprs; i++) { + if (set->exprs[i]->ops != exprs[i]->ops) + return false; + } + + return true; +} + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - u32 ktype, dtype, flags, policy, gc_int, objtype; struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_set_ops *ops; - struct nft_expr *expr = NULL; struct net *net = info->net; struct nft_set_desc desc; struct nft_table *table; @@ -4405,10 +4497,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, struct nft_set *set; struct nft_ctx ctx; size_t alloc_size; - u64 timeout; + int num_exprs = 0; char *name; int err, i; u16 udlen; + u32 flags; u64 size; if (nla[NFTA_SET_TABLE] == NULL || @@ -4419,10 +4512,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, memset(&desc, 0, sizeof(desc)); - ktype = NFT_DATA_VALUE; + desc.ktype = NFT_DATA_VALUE; if (nla[NFTA_SET_KEY_TYPE] != NULL) { - ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); - if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); + if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) return -EINVAL; } @@ -4447,17 +4540,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, return -EOPNOTSUPP; } - dtype = 0; + desc.dtype = 0; if (nla[NFTA_SET_DATA_TYPE] != NULL) { if (!(flags & NFT_SET_MAP)) return -EINVAL; - dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); - if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && - dtype != NFT_DATA_VERDICT) + desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); + if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && + desc.dtype != NFT_DATA_VERDICT) return -EINVAL; - if (dtype != NFT_DATA_VERDICT) { + if (desc.dtype != NFT_DATA_VERDICT) { if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); @@ -4472,34 +4565,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_OBJECT)) return -EINVAL; - objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); - if (objtype == NFT_OBJECT_UNSPEC || - objtype > NFT_OBJECT_MAX) + desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); + if (desc.objtype == NFT_OBJECT_UNSPEC || + desc.objtype > NFT_OBJECT_MAX) return -EOPNOTSUPP; } else if (flags & NFT_SET_OBJECT) return -EINVAL; else - objtype = NFT_OBJECT_UNSPEC; + desc.objtype = NFT_OBJECT_UNSPEC; - timeout = 0; + desc.timeout = 0; if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; } - gc_int = 0; + desc.gc_int = 0; if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } - policy = NFT_SET_POL_PERFORMANCE; + desc.policy = NFT_SET_POL_PERFORMANCE; if (nla[NFTA_SET_POLICY] != NULL) - policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); if (nla[NFTA_SET_DESC] != NULL) { err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); @@ -4531,6 +4624,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, return PTR_ERR(set); } } else { + struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {}; + if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return -EEXIST; @@ -4538,13 +4633,29 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - return 0; + err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); + if (err < 0) + return err; + + err = 0; + if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); + err = -EEXIST; + } + + for (i = 0; i < num_exprs; i++) + nft_expr_destroy(&ctx, exprs[i]); + + if (err < 0) + return err; + + return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc); } if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(&ctx, nla, &desc, policy); + ops = nft_select_set_ops(&ctx, nla, &desc); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -4584,18 +4695,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, set->table = table; write_pnet(&set->net, net); set->ops = ops; - set->ktype = ktype; + set->ktype = desc.ktype; set->klen = desc.klen; - set->dtype = dtype; - set->objtype = objtype; + set->dtype = desc.dtype; + set->objtype = desc.objtype; set->dlen = desc.dlen; set->flags = flags; set->size = desc.size; - set->policy = policy; + set->policy = desc.policy; set->udlen = udlen; set->udata = udata; - set->timeout = timeout; - set->gc_int = gc_int; + set->timeout = desc.timeout; + set->gc_int = desc.gc_int; set->field_count = desc.field_count; for (i = 0; i < desc.field_count; i++) @@ -4605,43 +4716,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (err < 0) goto err_set_init; - if (nla[NFTA_SET_EXPR]) { - expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); - if (IS_ERR(expr)) { - err = PTR_ERR(expr); - goto err_set_expr_alloc; - } - set->exprs[0] = expr; - set->num_exprs++; - } else if (nla[NFTA_SET_EXPRESSIONS]) { - struct nft_expr *expr; - struct nlattr *tmp; - int left; - - if (!(flags & NFT_SET_EXPR)) { - err = -EINVAL; - goto err_set_expr_alloc; - } - i = 0; - nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { - if (i == NFT_SET_EXPR_MAX) { - err = -E2BIG; - goto err_set_expr_alloc; - } - if (nla_type(tmp) != NFTA_LIST_ELEM) { - err = -EINVAL; - goto err_set_expr_alloc; - } - expr = nft_set_elem_expr_alloc(&ctx, set, tmp); - if (IS_ERR(expr)) { - err = PTR_ERR(expr); - goto err_set_expr_alloc; - } - set->exprs[i++] = expr; - set->num_exprs++; - } - } + err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags); + if (err < 0) + goto err_set_destroy; + set->num_exprs = num_exprs; set->handle = nf_tables_alloc_handle(table); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); @@ -4655,7 +4734,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); - +err_set_destroy: ops->destroy(set); err_set_init: kfree(set->name); @@ -6008,7 +6087,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; } else if (set->flags & NFT_SET_TIMEOUT && !(flags & NFT_SET_ELEM_INTERVAL_END)) { - timeout = set->timeout; + timeout = READ_ONCE(set->timeout); } expiration = 0; @@ -6109,7 +6188,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key_end; - if (timeout != set->timeout) { + if (timeout != READ_ONCE(set->timeout)) { err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); if (err < 0) goto err_parse_key_end; @@ -9031,14 +9110,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: - nft_clear(net, nft_trans_set(trans)); - /* This avoids hitting -EBUSY when deleting the table - * from the transaction. - */ - if (nft_set_is_anonymous(nft_trans_set(trans)) && - !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + if (nft_trans_set_update(trans)) { + struct nft_set *set = nft_trans_set(trans); + WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans)); + WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans)); + } else { + nft_clear(net, nft_trans_set(trans)); + /* This avoids hitting -EBUSY when deleting the table + * from the transaction. + */ + if (nft_set_is_anonymous(nft_trans_set(trans)) && + !list_empty(&nft_trans_set(trans)->bindings)) + trans->ctx.table->use--; + } nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); @@ -9260,6 +9345,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: + if (nft_trans_set_update(trans)) { + nft_trans_destroy(trans); + break; + } trans->ctx.table->use--; if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index f52255fea652..4a981ca90b0b 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -393,10 +393,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, result = tcf_classify(skb, NULL, fl, &res, true); if (result < 0) continue; + if (result == TC_ACT_SHOT) + goto done; + flow = (struct atm_flow_data *)res.class; if (!flow) flow = lookup_flow(sch, res.classid); - goto done; + goto drop; } } flow = NULL; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6568e17c4c63..36db5f6782f2 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -230,6 +230,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) result = tcf_classify(skb, NULL, fl, &res, true); if (!fl || result < 0) goto fallback; + if (result == TC_ACT_SHOT) + return NULL; cl = (void *)res.class; if (!cl) { @@ -250,8 +252,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; fallthrough; - case TC_ACT_SHOT: - return NULL; case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index e5b4bbf3ce3d..2238edece1a4 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -199,8 +199,14 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle) { return (unsigned long)htb_find(handle, sch); } + +#define HTB_DIRECT ((struct htb_class *)-1L) + /** * htb_classify - classify a packet into class + * @skb: the socket buffer + * @sch: the active queue discipline + * @qerr: pointer for returned status code * * It returns NULL if the packet should be dropped or -1 if the packet * should be passed directly thru. In all other cases leaf class is returned. @@ -211,8 +217,6 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle) * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful * then finish and return direct queue. */ -#define HTB_DIRECT ((struct htb_class *)-1L) - static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 9cc84114741d..a6911cae368c 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash +bind_timewait csum cmsg_sender diag_uid diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index b5af08af8559..4a110bb01e53 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -18,14 +18,15 @@ readonly V4_ADDR1=10.0.10.2 readonly V6_ADDR0=2001:db8:91::1 readonly V6_ADDR1=2001:db8:91::2 nsid=100 +ret=0 cleanup_v6() { ip netns del me ip netns del peer - sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1 - sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 } create_ns() @@ -61,7 +62,7 @@ setup_v6() { if [ $? -ne 0 ]; then cleanup_v6 echo "failed" - exit + exit 1 fi # Set veth2 down, which will put veth1 in NOCARRIER state @@ -88,7 +89,7 @@ setup_v4() { if [ $? -ne 0 ]; then cleanup_v4 echo "failed" - exit + exit 1 fi # Set veth1 down, which will put veth0 in NOCARRIER state @@ -115,6 +116,7 @@ run_arp_evict_nocarrier_enabled() { if [ $? -eq 0 ];then echo "failed" + ret=1 else echo "ok" fi @@ -134,6 +136,7 @@ run_arp_evict_nocarrier_disabled() { echo "ok" else echo "failed" + ret=1 fi cleanup_v4 @@ -164,6 +167,7 @@ run_ndisc_evict_nocarrier_enabled() { if [ $? -eq 0 ];then echo "failed" + ret=1 else echo "ok" fi @@ -182,6 +186,7 @@ run_ndisc_evict_nocarrier_disabled() { echo "ok" else echo "failed" + ret=1 fi cleanup_v6 @@ -198,6 +203,7 @@ run_ndisc_evict_nocarrier_disabled_all() { echo "ok" else echo "failed" + ret=1 fi cleanup_v6 @@ -218,3 +224,4 @@ if [ "$(id -u)" -ne 0 ];then fi run_all_tests +exit $ret diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c new file mode 100644 index 000000000000..cb9fdf51ea59 --- /dev/null +++ b/tools/testing/selftests/net/bind_timewait.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <sys/socket.h> +#include <netinet/in.h> + +#include "../kselftest_harness.h" + +FIXTURE(bind_timewait) +{ + struct sockaddr_in addr; + socklen_t addrlen; +}; + +FIXTURE_VARIANT(bind_timewait) +{ + __u32 addr_const; +}; + +FIXTURE_VARIANT_ADD(bind_timewait, localhost) +{ + .addr_const = INADDR_LOOPBACK +}; + +FIXTURE_VARIANT_ADD(bind_timewait, addrany) +{ + .addr_const = INADDR_ANY +}; + +FIXTURE_SETUP(bind_timewait) +{ + self->addr.sin_family = AF_INET; + self->addr.sin_port = 0; + self->addr.sin_addr.s_addr = htonl(variant->addr_const); + self->addrlen = sizeof(self->addr); +} + +FIXTURE_TEARDOWN(bind_timewait) +{ +} + +void create_timewait_socket(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_timewait) *self) +{ + int server_fd, client_fd, child_fd, ret; + struct sockaddr_in addr; + socklen_t addrlen; + + server_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(server_fd, 0); + + ret = bind(server_fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + ret = listen(server_fd, 1); + ASSERT_EQ(ret, 0); + + ret = getsockname(server_fd, (struct sockaddr *)&self->addr, &self->addrlen); + ASSERT_EQ(ret, 0); + + client_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(client_fd, 0); + + ret = connect(client_fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + child_fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GT(child_fd, 0); + + close(child_fd); + close(client_fd); + close(server_fd); +} + +TEST_F(bind_timewait, 1) +{ + int fd, ret; + + create_timewait_socket(_metadata, self); + + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(fd, 0); + + ret = bind(fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EADDRINUSE); + + close(fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 75dd83e39207..24b21b15ed3f 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -110,7 +110,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) static void cs_parse_args(int argc, char *argv[]) { - char o; + int o; while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { |