aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/sysctl/net.rst2
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c45
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c10
-rw-r--r--drivers/net/ethernet/freescale/fec.h10
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c42
-rw-r--r--drivers/net/ethernet/freescale/fec_ptp.c29
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h36
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c2
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/netdevice.h20
-rw-r--r--include/net/busy_poll.h2
-rw-r--r--include/net/gro.h2
-rw-r--r--include/uapi/linux/xfrm.h2
-rw-r--r--lib/ratelimit.c12
-rw-r--r--net/core/bpf_sk_storage.c5
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/filter.c13
-rw-r--r--net/core/gro_cells.c2
-rw-r--r--net/core/neighbour.c12
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c18
-rw-r--r--net/core/sysctl_net_core.c15
-rw-r--r--net/ipv4/devinet.c16
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv6/addrconf.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/key/af_key.c3
-rw-r--r--net/mptcp/protocol.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/socket.c2
-rw-r--r--net/xfrm/espintcp.c2
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_output.c1
-rw-r--r--net/xfrm/xfrm_policy.c3
-rw-r--r--net/xfrm/xfrm_state.c1
57 files changed, 370 insertions, 162 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 805f2281e000..60d44165fba7 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -271,7 +271,7 @@ poll cycle or the number of packets processed reaches netdev_budget.
netdev_max_backlog
------------------
-Maximum number of packets, queued on the INPUT side, when the interface
+Maximum number of packets, queued on the INPUT side, when the interface
receives packets faster than kernel can process them.
netdev_rss_key
diff --git a/MAINTAINERS b/MAINTAINERS
index 274b2c1e506e..5a699275d9d1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3679,7 +3679,7 @@ F: Documentation/networking/bonding.rst
F: drivers/net/bonding/
F: include/net/bond*
F: include/uapi/linux/if_bonding.h
-F: tools/testing/selftests/net/bonding/
+F: tools/testing/selftests/drivers/net/bonding/
BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER
M: Dan Robertson <[email protected]>
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 7461272a6d41..6bd69a7e6809 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -968,6 +968,7 @@ static void ksz_update_port_member(struct ksz_device *dev, int port)
static int ksz_setup(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
+ struct ksz_port *p;
const u16 *regs;
int ret;
@@ -1007,6 +1008,14 @@ static int ksz_setup(struct dsa_switch *ds)
return ret;
}
+ /* Start with learning disabled on standalone user ports, and enabled
+ * on the CPU port. In lack of other finer mechanisms, learning on the
+ * CPU port will avoid flooding bridge local addresses on the network
+ * in some cases.
+ */
+ p = &dev->ports[dev->cpu_port];
+ p->learning = true;
+
/* start switch */
regmap_update_bits(dev->regmap[0], regs[S_START_CTRL],
SW_START, SW_START);
@@ -1283,6 +1292,8 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
ksz_pread8(dev, port, regs[P_STP_CTRL], &data);
data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
+ p = &dev->ports[port];
+
switch (state) {
case BR_STATE_DISABLED:
data |= PORT_LEARN_DISABLE;
@@ -1292,9 +1303,13 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
break;
case BR_STATE_LEARNING:
data |= PORT_RX_ENABLE;
+ if (!p->learning)
+ data |= PORT_LEARN_DISABLE;
break;
case BR_STATE_FORWARDING:
data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
+ if (!p->learning)
+ data |= PORT_LEARN_DISABLE;
break;
case BR_STATE_BLOCKING:
data |= PORT_LEARN_DISABLE;
@@ -1306,12 +1321,38 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
ksz_pwrite8(dev, port, regs[P_STP_CTRL], data);
- p = &dev->ports[port];
p->stp_state = state;
ksz_update_port_member(dev, port);
}
+static int ksz_port_pre_bridge_flags(struct dsa_switch *ds, int port,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack)
+{
+ if (flags.mask & ~BR_LEARNING)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ksz_port_bridge_flags(struct dsa_switch *ds, int port,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack)
+{
+ struct ksz_device *dev = ds->priv;
+ struct ksz_port *p = &dev->ports[port];
+
+ if (flags.mask & BR_LEARNING) {
+ p->learning = !!(flags.val & BR_LEARNING);
+
+ /* Make the change take effect immediately */
+ ksz_port_stp_state_set(ds, port, p->stp_state);
+ }
+
+ return 0;
+}
+
static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds,
int port,
enum dsa_tag_protocol mp)
@@ -1725,6 +1766,8 @@ static const struct dsa_switch_ops ksz_switch_ops = {
.port_bridge_join = ksz_port_bridge_join,
.port_bridge_leave = ksz_port_bridge_leave,
.port_stp_state_set = ksz_port_stp_state_set,
+ .port_pre_bridge_flags = ksz_port_pre_bridge_flags,
+ .port_bridge_flags = ksz_port_bridge_flags,
.port_fast_age = ksz_port_fast_age,
.port_vlan_filtering = ksz_port_vlan_filtering,
.port_vlan_add = ksz_port_vlan_add,
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 764ada3a0f42..0d9520dc6d2d 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -65,6 +65,7 @@ struct ksz_chip_data {
struct ksz_port {
bool remove_tag; /* Remove Tag flag set, for ksz8795 only */
+ bool learning;
int stp_state;
struct phy_device phydev;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ba0f1ffac507..f46eefb5a029 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11178,10 +11178,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp))
features &= ~NETIF_F_NTUPLE;
- if (bp->flags & BNXT_FLAG_NO_AGG_RINGS)
- features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
-
- if (!(bp->flags & BNXT_FLAG_TPA))
+ if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog)
features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
if (!(features & NETIF_F_GRO))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 075c6206325c..b1b17f911300 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2130,6 +2130,7 @@ struct bnxt {
#define BNXT_DUMP_CRASH 1
struct bpf_prog *xdp_prog;
+ u8 xdp_has_frags;
struct bnxt_ptp_cfg *ptp_cfg;
u8 ptp_all_rx_tstamp;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 059f96f7a96f..a36803e79e92 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -1306,6 +1306,7 @@ int bnxt_dl_register(struct bnxt *bp)
if (rc)
goto err_dl_port_unreg;
+ devlink_set_features(dl, DEVLINK_F_RELOAD);
out:
devlink_register(dl);
return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 730febd19330..a4cba7cb2783 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -623,7 +623,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
if (bp->flags & BNXT_FLAG_CHIP_P5)
- hw_resc->max_irqs -= vf_msix * n;
+ hw_resc->max_nqs -= vf_msix;
rc = pf->active_vfs;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index f53387ed0167..c3065ec0a479 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -181,6 +181,7 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
struct xdp_buff *xdp)
{
struct bnxt_sw_rx_bd *rx_buf;
+ u32 buflen = PAGE_SIZE;
struct pci_dev *pdev;
dma_addr_t mapping;
u32 offset;
@@ -192,7 +193,10 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
mapping = rx_buf->mapping - bp->rx_dma_offset;
dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
- xdp_init_buff(xdp, BNXT_PAGE_MODE_BUF_SIZE + offset, &rxr->xdp_rxq);
+ if (bp->xdp_has_frags)
+ buflen = BNXT_PAGE_MODE_BUF_SIZE + offset;
+
+ xdp_init_buff(xdp, buflen, &rxr->xdp_rxq);
xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false);
}
@@ -397,8 +401,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
return -EOPNOTSUPP;
}
- if (prog)
+ if (prog) {
tx_xdp = bp->rx_nr_rings;
+ bp->xdp_has_frags = prog->aux->xdp_has_frags;
+ }
tc = netdev_get_num_tc(dev);
if (!tc)
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index ed7301b69169..0cebe4b63adb 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -634,6 +634,13 @@ struct fec_enet_private {
int pps_enable;
unsigned int next_counter;
+ struct {
+ struct timespec64 ts_phc;
+ u64 ns_sys;
+ u32 at_corr;
+ u8 at_inc_corr;
+ } ptp_saved_state;
+
u64 ethtool_stats[];
};
@@ -644,5 +651,8 @@ void fec_ptp_disable_hwts(struct net_device *ndev);
int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
+void fec_ptp_save_state(struct fec_enet_private *fep);
+int fec_ptp_restore_state(struct fec_enet_private *fep);
+
/****************************************************************************/
#endif /* FEC_H */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index e8e2aa1e7f01..b0d60f898249 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -285,8 +285,11 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define FEC_MMFR_TA (2 << 16)
#define FEC_MMFR_DATA(v) (v & 0xffff)
/* FEC ECR bits definition */
-#define FEC_ECR_MAGICEN (1 << 2)
-#define FEC_ECR_SLEEP (1 << 3)
+#define FEC_ECR_RESET BIT(0)
+#define FEC_ECR_ETHEREN BIT(1)
+#define FEC_ECR_MAGICEN BIT(2)
+#define FEC_ECR_SLEEP BIT(3)
+#define FEC_ECR_EN1588 BIT(4)
#define FEC_MII_TIMEOUT 30000 /* us */
@@ -982,6 +985,9 @@ fec_restart(struct net_device *ndev)
u32 temp_mac[2];
u32 rcntl = OPT_FRAME_SIZE | 0x04;
u32 ecntl = 0x2; /* ETHEREN */
+ struct ptp_clock_request ptp_rq = { .type = PTP_CLK_REQ_PPS };
+
+ fec_ptp_save_state(fep);
/* Whack a reset. We should wait for this.
* For i.MX6SX SOC, enet use AXI bus, we use disable MAC
@@ -1135,7 +1141,7 @@ fec_restart(struct net_device *ndev)
}
if (fep->bufdesc_ex)
- ecntl |= (1 << 4);
+ ecntl |= FEC_ECR_EN1588;
if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT &&
fep->rgmii_txc_dly)
@@ -1156,6 +1162,14 @@ fec_restart(struct net_device *ndev)
if (fep->bufdesc_ex)
fec_ptp_start_cyclecounter(ndev);
+ /* Restart PPS if needed */
+ if (fep->pps_enable) {
+ /* Clear flag so fec_ptp_enable_pps() doesn't return immediately */
+ fep->pps_enable = 0;
+ fec_ptp_restore_state(fep);
+ fep->ptp_caps.enable(&fep->ptp_caps, &ptp_rq, 1);
+ }
+
/* Enable interrupts we wish to service */
if (fep->link)
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
@@ -1206,6 +1220,8 @@ fec_stop(struct net_device *ndev)
struct fec_enet_private *fep = netdev_priv(ndev);
u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8);
u32 val;
+ struct ptp_clock_request ptp_rq = { .type = PTP_CLK_REQ_PPS };
+ u32 ecntl = 0;
/* We cannot expect a graceful transmit stop without link !!! */
if (fep->link) {
@@ -1215,6 +1231,8 @@ fec_stop(struct net_device *ndev)
netdev_err(ndev, "Graceful transmit stop did not complete!\n");
}
+ fec_ptp_save_state(fep);
+
/* Whack a reset. We should wait for this.
* For i.MX6SX SOC, enet use AXI bus, we use disable MAC
* instead of reset MAC itself.
@@ -1234,12 +1252,28 @@ fec_stop(struct net_device *ndev)
writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
+ if (fep->bufdesc_ex)
+ ecntl |= FEC_ECR_EN1588;
+
/* We have to keep ENET enabled to have MII interrupt stay working */
if (fep->quirks & FEC_QUIRK_ENET_MAC &&
!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
- writel(2, fep->hwp + FEC_ECNTRL);
+ ecntl |= FEC_ECR_ETHEREN;
writel(rmii_mode, fep->hwp + FEC_R_CNTRL);
}
+
+ writel(ecntl, fep->hwp + FEC_ECNTRL);
+
+ if (fep->bufdesc_ex)
+ fec_ptp_start_cyclecounter(ndev);
+
+ /* Restart PPS if needed */
+ if (fep->pps_enable) {
+ /* Clear flag so fec_ptp_enable_pps() doesn't return immediately */
+ fep->pps_enable = 0;
+ fec_ptp_restore_state(fep);
+ fep->ptp_caps.enable(&fep->ptp_caps, &ptp_rq, 1);
+ }
}
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 3dc3c0b626c2..c74d04f4b2fd 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -633,7 +633,36 @@ void fec_ptp_stop(struct platform_device *pdev)
struct net_device *ndev = platform_get_drvdata(pdev);
struct fec_enet_private *fep = netdev_priv(ndev);
+ if (fep->pps_enable)
+ fec_ptp_enable_pps(fep, 0);
+
cancel_delayed_work_sync(&fep->time_keep);
if (fep->ptp_clock)
ptp_clock_unregister(fep->ptp_clock);
}
+
+void fec_ptp_save_state(struct fec_enet_private *fep)
+{
+ u32 atime_inc_corr;
+
+ fec_ptp_gettime(&fep->ptp_caps, &fep->ptp_saved_state.ts_phc);
+ fep->ptp_saved_state.ns_sys = ktime_get_ns();
+
+ fep->ptp_saved_state.at_corr = readl(fep->hwp + FEC_ATIME_CORR);
+ atime_inc_corr = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_CORR_MASK;
+ fep->ptp_saved_state.at_inc_corr = (u8)(atime_inc_corr >> FEC_T_INC_CORR_OFFSET);
+}
+
+int fec_ptp_restore_state(struct fec_enet_private *fep)
+{
+ u32 atime_inc = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_MASK;
+ u64 ns_sys;
+
+ writel(fep->ptp_saved_state.at_corr, fep->hwp + FEC_ATIME_CORR);
+ atime_inc |= ((u32)fep->ptp_saved_state.at_inc_corr) << FEC_T_INC_CORR_OFFSET;
+ writel(atime_inc, fep->hwp + FEC_ATIME_INC);
+
+ ns_sys = ktime_get_ns() - fep->ptp_saved_state.ns_sys;
+ timespec64_add_ns(&fep->ptp_saved_state.ts_phc, ns_sys);
+ return fec_ptp_settime(&fep->ptp_caps, &fep->ptp_saved_state.ts_phc);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index cc5b85afd437..841fa149c407 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
* ice_xsk_pool - get XSK buffer pool bound to a ring
* @ring: Rx ring to use
*
- * Returns a pointer to xdp_umem structure if there is a buffer pool present,
- * NULL otherwise.
+ * Returns a pointer to xsk_buff_pool structure if there is a buffer pool
+ * present, NULL otherwise.
*/
static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
{
@@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
}
/**
- * ice_tx_xsk_pool - get XSK buffer pool bound to a ring
- * @ring: Tx ring to use
+ * ice_tx_xsk_pool - assign XSK buff pool to XDP ring
+ * @vsi: pointer to VSI
+ * @qid: index of a queue to look at XSK buff pool presence
*
- * Returns a pointer to xdp_umem structure if there is a buffer pool present,
- * NULL otherwise. Tx equivalent of ice_xsk_pool.
+ * Sets XSK buff pool pointer on XDP ring.
+ *
+ * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided
+ * queue id. Reason for doing so is that queue vectors might have assigned more
+ * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring
+ * carries a pointer to one of these XDP rings for its own purposes, such as
+ * handling XDP_TX action, therefore we can piggyback here on the
+ * rx_ring->xdp_ring assignment that was done during XDP rings initialization.
*/
-static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring)
+static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
{
- struct ice_vsi *vsi = ring->vsi;
- u16 qid;
+ struct ice_tx_ring *ring;
- qid = ring->q_index - vsi->alloc_txq;
+ ring = vsi->rx_rings[qid]->xdp_ring;
+ if (!ring)
+ return;
- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
- return NULL;
+ if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
+ ring->xsk_pool = NULL;
+ return;
+ }
- return xsk_get_pool_from_qid(vsi->netdev, qid);
+ ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 733c455f6574..0c4ec9264071 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1986,8 +1986,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
if (ret)
return ret;
- ice_for_each_xdp_txq(vsi, i)
- vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]);
+ ice_for_each_rxq(vsi, i)
+ ice_tx_xsk_pool(vsi, i);
return ret;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4ecaf40cf946..173fe6c31341 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring);
- xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
spin_lock_init(&xdp_ring->tx_lock);
for (j = 0; j < xdp_ring->count; j++) {
tx_desc = ICE_TX_DESC(xdp_ring, j);
@@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
}
}
- ice_for_each_rxq(vsi, i) {
- if (static_key_enabled(&ice_xdp_locking_key))
- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
- else
- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i];
- }
-
return 0;
free_xdp_rings:
@@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
xdp_rings_rem -= xdp_rings_per_v;
}
+ ice_for_each_rxq(vsi, i) {
+ if (static_key_enabled(&ice_xdp_locking_key)) {
+ vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
+ } else {
+ struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
+ struct ice_tx_ring *ring;
+
+ ice_for_each_tx_ring(ring, q_vector->tx) {
+ if (ice_ring_is_xdp(ring)) {
+ vsi->rx_rings[i]->xdp_ring = ring;
+ break;
+ }
+ }
+ }
+ ice_tx_xsk_pool(vsi, i);
+ }
+
/* omit the scheduler update if in reset path; XDP queues will be
* taken into account at the end of ice_vsi_rebuild, where
* ice_cfg_vsi_lan is being called
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 49ba8bfdbf04..e48e29258450 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
if (err)
goto free_buf;
ice_set_ring_xdp(xdp_ring);
- xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
+ ice_tx_xsk_pool(vsi, q_idx);
}
err = ice_vsi_cfg_rxq(rx_ring);
@@ -329,6 +329,12 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
bool if_running, pool_present = !!pool;
int ret = 0, pool_failure = 0;
+ if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
+ netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
+ pool_failure = -EINVAL;
+ goto failure;
+ }
+
if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
!is_power_of_2(vsi->tx_rings[qid]->count)) {
netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
@@ -353,7 +359,7 @@ xsk_pool_if_up:
if (if_running) {
ret = ice_qp_ena(vsi, qid);
if (!ret && pool_present)
- napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+ napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
else if (ret)
netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
}
@@ -944,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
if (!ice_is_xdp_ena_vsi(vsi))
return -EINVAL;
- if (queue_id >= vsi->num_txq)
+ if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq)
return -EINVAL;
- if (!vsi->xdp_rings[queue_id]->xsk_pool)
- return -EINVAL;
+ ring = vsi->rx_rings[queue_id]->xdp_ring;
- ring = vsi->xdp_rings[queue_id];
+ if (!ring->xsk_pool)
+ return -EINVAL;
/* The idea here is that if NAPI is running, mark a miss, so
* it will run again. If not, trigger an interrupt and
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
index 37522352e4b2..c8e5ca65bb6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -79,6 +79,10 @@ tc_act_police_offload(struct mlx5e_priv *priv,
struct mlx5e_flow_meter_handle *meter;
int err = 0;
+ err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack);
+ if (err)
+ return err;
+
err = fill_meter_params_from_act(act, &params);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 0aef69527226..3a1f76eac542 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -246,7 +246,7 @@ static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv
static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev,
struct list_head *list, int size)
{
- struct mlx5e_ktls_offload_context_tx *obj;
+ struct mlx5e_ktls_offload_context_tx *obj, *n;
struct mlx5e_async_ctx *bulk_async;
int i;
@@ -255,7 +255,7 @@ static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev,
return;
i = 0;
- list_for_each_entry(obj, list, list_node) {
+ list_for_each_entry_safe(obj, n, list, list_node) {
mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]);
i++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index e2a9b9be5c1f..e0ce5a233d0b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1395,10 +1395,11 @@ struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
}
return fs;
-err_free_fs:
- kvfree(fs);
+
err_free_vlan:
mlx5e_fs_vlan_free(fs);
+err_free_fs:
+ kvfree(fs);
err:
return NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d858667736a3..02eb2f0fa2ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3682,7 +3682,9 @@ static int set_feature_hw_tc(struct net_device *netdev, bool enable)
int err = 0;
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
- if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
+ int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) :
+ MLX5_TC_FLAG(NIC_OFFLOAD);
+ if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
@@ -4769,14 +4771,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
/* RQ */
mlx5e_build_rq_params(mdev, params);
- /* HW LRO */
- if (MLX5_CAP_ETH(mdev, lro_cap) &&
- params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- /* No XSK params: checking the availability of striding RQ in general. */
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- params->packet_merge.type = slow_pci_heuristic(mdev) ?
- MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
- }
params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 0c66774a1720..759f7d3c2cfd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -662,6 +662,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
params->mqprio.num_tc = 1;
params->tunneled_offload_en = false;
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ params->vlan_strip_disable = true;
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index ed73132129aa..a9f4c652f859 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -427,7 +427,8 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
dest[dest_idx].vport.vhca_id =
MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (mlx5_lag_mpesw_is_activated(esw->dev))
+ if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK &&
+ mlx5_lag_mpesw_is_activated(esw->dev))
dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
}
if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
@@ -3115,8 +3116,10 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs,
MLX5_VPORT_UC_ADDR_CHANGE);
- if (err)
+ if (err) {
+ devl_unlock(devlink);
return;
+ }
}
esw->esw_funcs.num_vfs = new_num_vfs;
devl_unlock(devlink);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 0f34e3c80d1f..065102278cb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1067,30 +1067,32 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
unsigned int fn = mlx5_get_dev_index(dev);
+ unsigned long flags;
if (fn >= ldev->ports)
return;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
+ unsigned long flags;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
for (i = 0; i < ldev->ports; i++) {
if (ldev->pf[i].netdev == netdev) {
ldev->pf[i].netdev = NULL;
break;
}
}
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
@@ -1234,7 +1236,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
mlx5_ldev_add_netdev(ldev, dev, netdev);
for (i = 0; i < ldev->ports; i++)
- if (!ldev->pf[i].dev)
+ if (!ldev->pf[i].netdev)
break;
if (i >= ldev->ports)
@@ -1246,12 +1248,13 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_roce(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1260,12 +1263,13 @@ EXPORT_SYMBOL(mlx5_lag_is_roce);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1274,13 +1278,14 @@ EXPORT_SYMBOL(mlx5_lag_is_active);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev) &&
dev == ldev->pf[MLX5_LAG_P1].dev;
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1289,12 +1294,13 @@ EXPORT_SYMBOL(mlx5_lag_is_master);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1303,13 +1309,14 @@ EXPORT_SYMBOL(mlx5_lag_is_sriov);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev) &&
test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1352,9 +1359,10 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
{
struct net_device *ndev = NULL;
struct mlx5_lag *ldev;
+ unsigned long flags;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
@@ -1373,7 +1381,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
dev_hold(ndev);
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return ndev;
}
@@ -1383,10 +1391,11 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
u8 port = 0;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
@@ -1401,7 +1410,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
port = ldev->v2p_map[port * ldev->buckets];
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return port;
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
@@ -1422,8 +1431,9 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev = NULL;
struct mlx5_lag *ldev;
+ unsigned long flags;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!ldev)
goto unlock;
@@ -1433,7 +1443,7 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
ldev->pf[MLX5_LAG_P1].dev;
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return peer_dev;
}
EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
@@ -1446,6 +1456,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
struct mlx5_core_dev **mdev;
struct mlx5_lag *ldev;
+ unsigned long flags;
int num_ports;
int ret, i, j;
void *out;
@@ -1462,7 +1473,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
memset(values, 0, sizeof(*values) * num_counters);
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (ldev && __mlx5_lag_is_active(ldev)) {
num_ports = ldev->ports;
@@ -1472,7 +1483,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
num_ports = 1;
mdev[MLX5_LAG_P1] = dev;
}
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
for (i = 0; i < num_ports; ++i) {
u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index bec8d6d0b5f6..c085b031abfc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1530,7 +1530,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
+ lockdep_register_key(&dev->lock_key);
mutex_init(&dev->intf_state_mutex);
+ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
@@ -1597,6 +1599,7 @@ err_timeout_init:
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
return err;
}
@@ -1618,6 +1621,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
}
static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index ec76a8b1acc1..60596357bfc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -376,8 +376,8 @@ retry:
goto out_dropped;
}
}
+ err = mlx5_cmd_check(dev, err, in, out);
if (err) {
- err = mlx5_cmd_check(dev, err, in, out);
mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
func_id, npages, err);
goto out_dropped;
@@ -524,10 +524,13 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
dev->priv.reclaim_pages_discard += npages;
}
/* if triggered by FW event and failed by FW then ignore */
- if (event && err == -EREMOTEIO)
+ if (event && err == -EREMOTEIO) {
err = 0;
+ goto out_free;
+ }
+
+ err = mlx5_cmd_check(dev, err, in, out);
if (err) {
- err = mlx5_cmd_check(dev, err, in, out);
mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index ee2e1b7c1310..c0e6c487c63c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -159,11 +159,11 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
devl_lock(devlink);
err = mlx5_device_enable_sriov(dev, num_vfs);
+ devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
return err;
}
- devl_unlock(devlink);
err = pci_enable_sriov(pdev, num_vfs);
if (err) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 96b16fbe1aa4..7b7ce602c808 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -779,6 +779,7 @@ struct mlx5_core_dev {
enum mlx5_device_state state;
/* sync interface state */
struct mutex intf_state_mutex;
+ struct lock_class_key lock_key;
unsigned long intf_state;
struct mlx5_priv priv;
struct mlx5_profile profile;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a3cb93c3dcc..05d6f3facd5a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -640,9 +640,23 @@ extern int sysctl_devconf_inherit_init_net;
*/
static inline bool net_has_fallback_tunnels(const struct net *net)
{
- return !IS_ENABLED(CONFIG_SYSCTL) ||
- !sysctl_fb_tunnels_only_for_init_net ||
- (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1);
+#if IS_ENABLED(CONFIG_SYSCTL)
+ int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net);
+
+ return !fb_tunnels_only_for_init_net ||
+ (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1);
+#else
+ return true;
+#endif
+}
+
+static inline int net_inherit_devconf(void)
+{
+#if IS_ENABLED(CONFIG_SYSCTL)
+ return READ_ONCE(sysctl_devconf_inherit_init_net);
+#else
+ return 0;
+#endif
}
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c4898fcbf923..f90f0021f5f2 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly;
static inline bool net_busy_loop_on(void)
{
- return sysctl_net_busy_poll;
+ return READ_ONCE(sysctl_net_busy_poll);
}
static inline bool sk_can_busy_loop(const struct sock *sk)
diff --git a/include/net/gro.h b/include/net/gro.h
index 867656b0739c..24003dea8fa4 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -439,7 +439,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
- if (napi->rx_count >= gro_normal_batch)
+ if (napi->rx_count >= READ_ONCE(gro_normal_batch))
gro_normal_list(napi);
}
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index b1f3e6a8f11a..4f84ea7ee14c 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -296,7 +296,7 @@ enum xfrm_attr_type_t {
XFRMA_ETIMER_THRESH,
XFRMA_SRCADDR, /* xfrm_address_t */
XFRMA_COADDR, /* xfrm_address_t */
- XFRMA_LASTUSED, /* unsigned long */
+ XFRMA_LASTUSED, /* __u64 */
XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */
XFRMA_MIGRATE,
XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index e01a93f46f83..ce945c17980b 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -26,10 +26,16 @@
*/
int ___ratelimit(struct ratelimit_state *rs, const char *func)
{
+ /* Paired with WRITE_ONCE() in .proc_handler().
+ * Changing two values seperately could be inconsistent
+ * and some message could be lost. (See: net_ratelimit_state).
+ */
+ int interval = READ_ONCE(rs->interval);
+ int burst = READ_ONCE(rs->burst);
unsigned long flags;
int ret;
- if (!rs->interval)
+ if (!interval)
return 1;
/*
@@ -44,7 +50,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
if (!rs->begin)
rs->begin = jiffies;
- if (time_is_before_jiffies(rs->begin + rs->interval)) {
+ if (time_is_before_jiffies(rs->begin + interval)) {
if (rs->missed) {
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
printk_deferred(KERN_WARNING
@@ -56,7 +62,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
rs->begin = jiffies;
rs->printed = 0;
}
- if (rs->burst && rs->burst > rs->printed) {
+ if (burst && burst > rs->printed) {
rs->printed++;
ret = 1;
} else {
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 1b7f385643b4..94374d529ea4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -310,11 +310,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size)
{
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
struct sock *sk = (struct sock *)owner;
/* same check as in sock_kmalloc() */
- if (size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ if (size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
atomic_add(size, &sk->sk_omem_alloc);
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 716df64fcfa5..56c8b0921c9f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4624,7 +4624,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
struct softnet_data *sd;
unsigned int old_flow, new_flow;
- if (qlen < (netdev_max_backlog >> 1))
+ if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
return false;
sd = this_cpu_ptr(&softnet_data);
@@ -4672,7 +4672,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
+ if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
if (qlen) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -4928,7 +4928,7 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -5281,7 +5281,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!netdev_tstamp_prequeue, skb);
+ net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5664,7 +5664,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5694,7 +5694,7 @@ void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
@@ -5918,7 +5918,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = dev_rx_weight;
+ napi->weight = READ_ONCE(dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -6665,8 +6665,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(netdev_budget_usecs);
- int budget = netdev_budget;
+ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
+ int budget = READ_ONCE(netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -10284,7 +10284,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
return dev;
if (time_after(jiffies, warning_time +
- netdev_unregister_timeout_secs * HZ)) {
+ READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
list_for_each_entry(dev, list, todo_list) {
pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, netdev_refcnt_read(dev));
diff --git a/net/core/filter.c b/net/core/filter.c
index e8508aaafd27..c191db80ce93 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1214,10 +1214,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */
- if (filter_size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+ if (filter_size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
@@ -1548,7 +1549,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
@@ -1615,7 +1616,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
- if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
err = -ENOMEM;
goto err_prog_put;
}
@@ -5034,14 +5035,14 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
/* Only some socketops are supported */
switch (optname) {
case SO_RCVBUF:
- val = min_t(u32, val, sysctl_rmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_rmem_max));
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
WRITE_ONCE(sk->sk_rcvbuf,
max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
WRITE_ONCE(sk->sk_sndbuf,
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 541c7a72a28a..21619c70a82b 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -26,7 +26,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
cell = this_cpu_ptr(gcells->cells);
- if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
drop:
dev_core_stats_rx_dropped_inc(dev);
kfree_skb(skb);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5b669eb80270..78cc8fb68814 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -309,14 +309,17 @@ static int neigh_del_timer(struct neighbour *n)
static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
{
+ struct sk_buff_head tmp;
unsigned long flags;
struct sk_buff *skb;
+ skb_queue_head_init(&tmp);
spin_lock_irqsave(&list->lock, flags);
skb = skb_peek(list);
while (skb != NULL) {
struct sk_buff *skb_next = skb_peek_next(skb, list);
struct net_device *dev = skb->dev;
+
if (net == NULL || net_eq(dev_net(dev), net)) {
struct in_device *in_dev;
@@ -326,13 +329,16 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
in_dev->arp_parms->qlen--;
rcu_read_unlock();
__skb_unlink(skb, list);
-
- dev_put(dev);
- kfree_skb(skb);
+ __skb_queue_tail(&tmp, skb);
}
skb = skb_next;
}
spin_unlock_irqrestore(&list->lock, flags);
+
+ while ((skb = __skb_dequeue(&tmp))) {
+ dev_put(skb->dev);
+ kfree_skb(skb);
+ }
}
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 974bbbbe7138..84bb5e188d0d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4205,9 +4205,8 @@ normal:
SKB_GSO_CB(nskb)->csum_start =
skb_headroom(nskb) + doffset;
} else {
- skb_copy_bits(head_skb, offset,
- skb_put(nskb, len),
- len);
+ if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
+ goto err;
}
continue;
}
@@ -4798,7 +4797,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
bool ret;
- if (likely(sysctl_tstamp_allow_data || tsonly))
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
return true;
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/core/sock.c b/net/core/sock.c
index 4cb957d934a2..788c1372663c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1101,7 +1101,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
set_sndbuf:
/* Ensure val * 2 fits into an int, to prevent max_t()
* from treating it as a negative value.
@@ -1133,7 +1133,7 @@ set_sndbuf:
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
+ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
break;
case SO_RCVBUFFORCE:
@@ -2536,7 +2536,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
- sysctl_optmem_max)
+ READ_ONCE(sysctl_optmem_max))
return NULL;
skb = alloc_skb(size, priority);
@@ -2554,8 +2554,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
- if ((unsigned int)size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
+
+ if ((unsigned int)size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
void *mem;
/* First do the add, to avoid the race if kmalloc
* might sleep.
@@ -3309,8 +3311,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
- sk->sk_rcvbuf = sysctl_rmem_default;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock);
@@ -3365,7 +3367,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
- sk->sk_ll_usec = sysctl_net_busy_read;
+ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
#endif
sk->sk_max_pacing_rate = ~0UL;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 71a13596ea2b..725891527814 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -234,14 +234,17 @@ static int set_default_qdisc(struct ctl_table *table, int write,
static int proc_do_dev_weight(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int ret;
+ static DEFINE_MUTEX(dev_weight_mutex);
+ int ret, weight;
+ mutex_lock(&dev_weight_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret != 0)
- return ret;
-
- dev_rx_weight = weight_p * dev_weight_rx_bias;
- dev_tx_weight = weight_p * dev_weight_tx_bias;
+ if (!ret && write) {
+ weight = READ_ONCE(weight_p);
+ WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ }
+ mutex_unlock(&dev_weight_mutex);
return ret;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 92b778e423df..e8b9a9202fec 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2682,23 +2682,27 @@ static __net_init int devinet_init_net(struct net *net)
#endif
if (!net_eq(net, &init_net)) {
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- sysctl_devconf_inherit_init_net == 3) {
+ switch (net_inherit_devconf()) {
+ case 3:
/* copy from the current netns */
memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt,
current->nsproxy->net_ns->ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
- } else if (!IS_ENABLED(CONFIG_SYSCTL) ||
- sysctl_devconf_inherit_init_net != 2) {
- /* inherit == 0 or 1: copy from init_net */
+ break;
+ case 0:
+ case 1:
+ /* copy from init_net */
memcpy(all, init_net.ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt, init_net.ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
+ break;
+ case 2:
+ /* use compiled values */
+ break;
}
- /* else inherit == 2: use compiled values */
}
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d7bd1daf022b..04e2034f2f8e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1730,7 +1730,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
ipc.sockc.mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a8a323ecbb54..e49a61a053a6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -772,7 +772,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -808,7 +808,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -1233,7 +1233,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval;
- if (optlen > sysctl_optmem_max) {
+ if (optlen > READ_ONCE(sysctl_optmem_max)) {
err = -ENOBUFS;
break;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bbe218753662..e5011c136fdb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1000,7 +1000,7 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1354,7 +1354,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags) {
+ if (i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 78b654ff421b..290019de766d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -239,7 +239,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (wscale_ok) {
/* Set window scaling on max possible window */
space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
- space = max_t(u32, space, sysctl_rmem_max);
+ space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b624e3d8c5f0..e15f64f22fa8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -7162,9 +7162,8 @@ static int __net_init addrconf_init_net(struct net *net)
if (!dflt)
goto err_alloc_dflt;
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- !net_eq(net, &init_net)) {
- switch (sysctl_devconf_inherit_init_net) {
+ if (!net_eq(net, &init_net)) {
+ switch (net_inherit_devconf()) {
case 1: /* copy from init_net */
memcpy(all, init_net.ipv6.devconf_all,
sizeof(ipv6_devconf));
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 222f6bf220ba..e0dcc7a193df 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index fda2dcc8a383..c85df5b958d2 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1697,9 +1697,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
pfk->registered |= (1<<hdr->sadb_msg_satype);
}
+ mutex_lock(&pfkey_mutex);
xfrm_probe_algs();
supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
+ mutex_unlock(&pfkey_mutex);
+
if (!supp_skb) {
if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
pfk->registered &= ~(1<<hdr->sadb_msg_satype);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index da4257504fad..d398f3810662 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1263,7 +1263,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
- if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 9d43277b8b4f..a56fd0b5a430 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1280,12 +1280,12 @@ static void set_sock_size(struct sock *sk, int mode, int val)
lock_sock(sk);
if (mode) {
val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
- sysctl_wmem_max);
+ READ_ONCE(sysctl_wmem_max));
sk->sk_sndbuf = val * 2;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
} else {
val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
- sysctl_rmem_max);
+ READ_ONCE(sysctl_rmem_max));
sk->sk_rcvbuf = val * 2;
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d47b9689eba6..99b697ad2b98 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -409,7 +409,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = dev_tx_weight;
+ int quota = READ_ONCE(dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
diff --git a/net/socket.c b/net/socket.c
index 9b27c5e4e5ba..7378375d3a5b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1801,7 +1801,7 @@ int __sys_listen(int fd, int backlog)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 82d14eea1b5a..974eb97b77d2 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -168,7 +168,7 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog)
+ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 144238a50f3d..b2f4ec9c537f 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -669,7 +669,6 @@ resume:
x->curlft.bytes += skb->len;
x->curlft.packets++;
- x->curlft.use_time = ktime_get_real_seconds();
spin_unlock(&x->lock);
@@ -783,7 +782,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 555ab35cd119..9a5e79a38c67 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -534,7 +534,6 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
x->curlft.bytes += skb->len;
x->curlft.packets++;
- x->curlft.use_time = ktime_get_real_seconds();
spin_unlock_bh(&x->lock);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f1a0bab920a5..cc6ab79609e2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3162,7 +3162,7 @@ ok:
return dst;
nopol:
- if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
+ if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
err = -EPERM;
goto error;
@@ -3599,6 +3599,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ xfrm_pol_put(pols[0]);
return 0;
}
pols[1]->curlft.use_time = ktime_get_real_seconds();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 52e60e607f8a..91c32a3b6924 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1592,6 +1592,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->replay = orig->replay;
x->preplay = orig->preplay;
x->mapping_maxage = orig->mapping_maxage;
+ x->lastused = orig->lastused;
x->new_mapping = 0;
x->new_mapping_sport = 0;