aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.clang-format2
-rw-r--r--.mailmap4
-rw-r--r--Documentation/RCU/arrayRCU.rst165
-rw-r--r--Documentation/RCU/checklist.rst244
-rw-r--r--Documentation/RCU/index.rst1
-rw-r--r--Documentation/RCU/listRCU.rst174
-rw-r--r--Documentation/RCU/lockdep.rst4
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml2
-rw-r--r--Documentation/loongarch/booting.rst42
-rw-r--r--Documentation/loongarch/index.rst1
-rw-r--r--Documentation/memory-barriers.txt2
-rw-r--r--Documentation/mm/slub.rst2
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt149
-rw-r--r--Documentation/translations/zh_CN/loongarch/booting.rst48
-rw-r--r--Documentation/translations/zh_CN/loongarch/index.rst1
-rw-r--r--Documentation/virt/kvm/api.rst15
-rw-r--r--Documentation/virt/kvm/halt-polling.rst (renamed from Documentation/virt/kvm/x86/halt-polling.rst)13
-rw-r--r--Documentation/virt/kvm/index.rst1
-rw-r--r--Documentation/virt/kvm/x86/index.rst1
-rw-r--r--MAINTAINERS23
-rw-r--r--Makefile4
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm/boot/dts/at91rm9200.dtsi2
-rw-r--r--arch/arm/boot/dts/imx7s.dtsi4
-rw-r--r--arch/arm/configs/clps711x_defconfig3
-rw-r--r--arch/arm/configs/collie_defconfig3
-rw-r--r--arch/arm/configs/multi_v4t_defconfig3
-rw-r--r--arch/arm/configs/omap1_defconfig3
-rw-r--r--arch/arm/configs/pxa_defconfig3
-rw-r--r--arch/arm/configs/tct_hammer_defconfig3
-rw-r--r--arch/arm/configs/xcep_defconfig3
-rw-r--r--arch/arm/mach-at91/sama5.c2
-rw-r--r--arch/arm/mm/fault.c18
-rw-r--r--arch/arm/mm/fault.h9
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/efi.h8
-rw-r--r--arch/arm64/kernel/efi-rt-wrapper.S33
-rw-r--r--arch/arm64/kernel/efi.c26
-rw-r--r--arch/arm64/mm/dma-mapping.c17
-rw-r--r--arch/arm64/mm/fault.c4
-rw-r--r--arch/loongarch/Kconfig1
-rw-r--r--arch/loongarch/include/asm/pgtable.h1
-rw-r--r--arch/loongarch/include/asm/smp.h10
-rw-r--r--arch/loongarch/kernel/smp.c11
-rw-r--r--arch/loongarch/mm/tlbex.S30
-rw-r--r--arch/m68k/emu/nfcon.c9
-rw-r--r--arch/mips/include/asm/pgtable.h1
-rw-r--r--arch/openrisc/configs/or1ksim_defconfig3
-rw-r--r--arch/openrisc/configs/simple_smp_defconfig3
-rw-r--r--arch/powerpc/include/asm/interrupt.h1
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c52
-rw-r--r--arch/riscv/Kconfig6
-rw-r--r--arch/riscv/configs/nommu_k210_defconfig3
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig3
-rw-r--r--arch/riscv/configs/nommu_virt_defconfig3
-rw-r--r--arch/riscv/include/asm/asm.h1
-rw-r--r--arch/riscv/include/asm/efi.h6
-rw-r--r--arch/riscv/include/asm/pgalloc.h11
-rw-r--r--arch/riscv/include/asm/pgtable.h1
-rw-r--r--arch/riscv/include/asm/smp.h3
-rw-r--r--arch/riscv/kernel/entry.S13
-rw-r--r--arch/riscv/kernel/machine_kexec.c46
-rw-r--r--arch/riscv/kernel/setup.c9
-rw-r--r--arch/riscv/kernel/smp.c97
-rw-r--r--arch/riscv/kernel/traps.c18
-rw-r--r--arch/riscv/kernel/vdso/Makefile1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/pgtable.h1
-rw-r--r--arch/s390/kvm/vsie.c4
-rw-r--r--arch/sh/configs/rsk7201_defconfig3
-rw-r--r--arch/sh/configs/rsk7203_defconfig3
-rw-r--r--arch/sh/configs/se7206_defconfig3
-rw-r--r--arch/sh/configs/shmin_defconfig3
-rw-r--r--arch/sh/configs/shx3_defconfig3
-rw-r--r--arch/sparc/include/asm/pgtable_64.h1
-rw-r--r--arch/um/kernel/kmsg_dump.c24
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/events/intel/p4.c2
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/pgtable.h9
-rw-r--r--arch/x86/kernel/cpu/bugs.c21
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--drivers/acpi/numa/hmat.c27
-rw-r--r--drivers/ata/libahci_platform.c2
-rw-r--r--drivers/bluetooth/btusb.c6
-rw-r--r--drivers/char/tpm/eventlog/acpi.c12
-rw-r--r--drivers/char/tpm/st33zp24/i2c.c142
-rw-r--r--drivers/char/tpm/st33zp24/spi.c145
-rw-r--r--drivers/char/tpm/st33zp24/st33zp24.c39
-rw-r--r--drivers/char/tpm/st33zp24/st33zp24.h7
-rw-r--r--drivers/char/tpm/tpm-chip.c7
-rw-r--r--drivers/char/tpm/tpm-interface.c5
-rw-r--r--drivers/char/tpm/tpm_crb.c31
-rw-r--r--drivers/char/tpm/tpm_ftpm_tee.c8
-rw-r--r--drivers/char/tpm/tpm_tis.c9
-rw-r--r--drivers/char/tpm/tpm_tis_core.c20
-rw-r--r--drivers/char/tpm/tpm_tis_core.h1
-rw-r--r--drivers/char/tpm/tpm_tis_i2c.c3
-rw-r--r--drivers/clk/at91/at91rm9200.c2
-rw-r--r--drivers/clk/qcom/gcc-sc8280xp.c6
-rw-r--r--drivers/clk/qcom/gdsc.c61
-rw-r--r--drivers/clk/qcom/gdsc.h2
-rw-r--r--drivers/clk/samsung/clk-exynos-clkout.c6
-rw-r--r--drivers/clk/samsung/clk-exynos7885.c4
-rw-r--r--drivers/clocksource/timer-riscv.c2
-rw-r--r--drivers/dax/hmem/device.c24
-rw-r--r--drivers/firmware/efi/earlycon.c8
-rw-r--r--drivers/firmware/efi/efi-pstore.c2
-rw-r--r--drivers/gpio/gpio-amd8111.c4
-rw-r--r--drivers/gpio/gpio-rockchip.c1
-rw-r--r--drivers/gpio/gpiolib.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c24
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h2
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c6
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c4
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c18
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c2
-rw-r--r--drivers/gpu/drm/i915/intel_dram.c3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg.c6
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c4
-rw-r--r--drivers/hid/hid-core.c3
-rw-r--r--drivers/hid/hid-ids.h4
-rw-r--r--drivers/hid/hid-ite.c5
-rw-r--r--drivers/hid/hid-lg4ff.c6
-rw-r--r--drivers/hid/hid-logitech-hidpp.c28
-rw-r--r--drivers/hid/hid-quirks.c3
-rw-r--r--drivers/hid/hid-uclogic-core.c1
-rw-r--r--drivers/hid/hid-uclogic-rdesc.c2
-rw-r--r--drivers/hid/i2c-hid/Kconfig4
-rw-r--r--drivers/hwmon/asus-ec-sensors.c2
-rw-r--r--drivers/hwmon/coretemp.c9
-rw-r--r--drivers/hwmon/i5500_temp.c2
-rw-r--r--drivers/hwmon/ibmpex.c1
-rw-r--r--drivers/hwmon/ina3221.c4
-rw-r--r--drivers/hwmon/ltc2947-core.c2
-rw-r--r--drivers/i2c/busses/i2c-cadence.c11
-rw-r--r--drivers/i2c/busses/i2c-imx.c6
-rw-r--r--drivers/i2c/busses/i2c-npcm7xx.c11
-rw-r--r--drivers/i2c/busses/i2c-qcom-geni.c1
-rw-r--r--drivers/i2c/i2c-core-base.c9
-rw-r--r--drivers/input/touchscreen/raydium_i2c_ts.c4
-rw-r--r--drivers/iommu/intel/dmar.c1
-rw-r--r--drivers/iommu/intel/iommu.c73
-rw-r--r--drivers/iommu/intel/iommu.h4
-rw-r--r--drivers/iommu/intel/svm.c19
-rw-r--r--drivers/media/common/videobuf2/frame_vector.c68
-rw-r--r--drivers/media/common/videobuf2/videobuf2-core.c102
-rw-r--r--drivers/media/dvb-frontends/stv0288.c5
-rw-r--r--drivers/media/v4l2-core/v4l2-dv-timings.c20
-rw-r--r--drivers/mmc/core/core.c9
-rw-r--r--drivers/mmc/core/mmc_test.c3
-rw-r--r--drivers/mmc/host/mtk-sd.c6
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c2
-rw-r--r--drivers/mmc/host/sdhci-sprd.c4
-rw-r--r--drivers/mmc/host/sdhci.c61
-rw-r--r--drivers/mmc/host/sdhci.h2
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/can/can327.c21
-rw-r--r--drivers/net/can/cc770/cc770_isa.c10
-rw-r--r--drivers/net/can/m_can/m_can.c2
-rw-r--r--drivers/net/can/m_can/m_can_pci.c9
-rw-r--r--drivers/net/can/sja1000/sja1000_isa.c10
-rw-r--r--drivers/net/can/slcan/slcan-core.c10
-rw-r--r--drivers/net/can/usb/esd_usb.c6
-rw-r--r--drivers/net/can/usb/etas_es58x/es58x_core.c5
-rw-r--r--drivers/net/can/usb/mcba_usb.c10
-rw-r--r--drivers/net/dsa/lan9303-core.c2
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c7
-rw-r--r--drivers/net/dsa/sja1105/sja1105_devlink.c2
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c2
-rw-r--r--drivers/net/ethernet/aeroflex/greth.c1
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c5
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_main.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_main.h2
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig3
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c4
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c4
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c23
-rw-r--r--drivers/net/ethernet/hisilicon/hisi_femac.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hix5hd2_gmac.c2
-rw-r--r--drivers/net/ethernet/intel/e100.c5
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c12
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c30
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c9
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c10
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c7
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router.c2
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router_hw.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c122
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c5
-rw-r--r--drivers/net/ethernet/microchip/encx24j600-regmap.c4
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c3
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_packet.c41
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma.h9
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfdk/dp.c6
-rw-r--r--drivers/net/ethernet/ni/nixge.c29
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c24
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c4
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c8
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c4
-rw-r--r--drivers/net/ieee802154/ca8210.c2
-rw-r--r--drivers/net/ieee802154/cc2520.c2
-rw-r--r--drivers/net/loopback.c2
-rw-r--r--drivers/net/macsec.c1
-rw-r--r--drivers/net/mdio/fwnode_mdio.c6
-rw-r--r--drivers/net/mdio/of_mdio.c3
-rw-r--r--drivers/net/netconsole.c21
-rw-r--r--drivers/net/ntb_netdev.c9
-rw-r--r--drivers/net/phy/mdio_device.c2
-rw-r--r--drivers/net/phy/mxl-gpy.c85
-rw-r--r--drivers/net/phy/phy_device.c2
-rw-r--r--drivers/net/phy/phylink.c22
-rw-r--r--drivers/net/plip/plip.c4
-rw-r--r--drivers/net/thunderbolt.c1
-rw-r--r--drivers/net/tun.c4
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c27
-rw-r--r--drivers/net/wireless/microchip/wilc1000/cfg80211.c39
-rw-r--r--drivers/net/wireless/microchip/wilc1000/hif.c21
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_mux.c1
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_mux_codec.c26
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_protocol.h2
-rw-r--r--drivers/net/xen-netback/common.h2
-rw-r--r--drivers/net/xen-netback/interface.c6
-rw-r--r--drivers/net/xen-netback/netback.c225
-rw-r--r--drivers/net/xen-netback/rx.c8
-rw-r--r--drivers/net/xen-netfront.c6
-rw-r--r--drivers/nvme/host/core.c10
-rw-r--r--drivers/nvme/host/multipath.c3
-rw-r--r--drivers/nvme/host/pci.c2
-rw-r--r--drivers/of/property.c4
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.c27
-rw-r--r--drivers/pinctrl/mediatek/mtk-eint.c9
-rw-r--r--drivers/pinctrl/pinctrl-single.c2
-rw-r--r--drivers/platform/x86/amd/pmc.c6
-rw-r--r--drivers/s390/net/qeth_l2_main.c2
-rw-r--r--drivers/sbus/char/envctrl.c4
-rw-r--r--drivers/scsi/scsi_error.c2
-rw-r--r--drivers/staging/media/atomisp/pci/hive_types.h2
-rw-r--r--drivers/tty/hvc/hvc_console.c4
-rw-r--r--drivers/tty/serial/8250/8250_core.c2
-rw-r--r--drivers/tty/serial/earlycon.c4
-rw-r--r--drivers/tty/serial/kgdboc.c46
-rw-r--r--drivers/tty/serial/pic32_uart.c4
-rw-r--r--drivers/tty/serial/samsung_tty.c2
-rw-r--r--drivers/tty/serial/serial_core.c14
-rw-r--r--drivers/tty/serial/sh-sci.c20
-rw-r--r--drivers/tty/serial/xilinx_uartps.c2
-rw-r--r--drivers/tty/tty_io.c18
-rw-r--r--drivers/usb/early/xhci-dbc.c2
-rw-r--r--drivers/video/fbdev/xen-fbfront.c12
-rw-r--r--fs/afs/fs_probe.c4
-rw-r--r--fs/afs/server.c2
-rw-r--r--fs/binfmt_elf.c32
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/ceph/locks.c4
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/exec.c34
-rw-r--r--fs/fscache/cookie.c8
-rw-r--r--fs/fuse/file.c37
-rw-r--r--fs/ksmbd/vfs.c2
-rw-r--r--fs/lockd/svcsubs.c4
-rw-r--r--fs/locks.c50
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/nfs4state.c6
-rw-r--r--fs/nilfs2/dat.c7
-rw-r--r--fs/proc/consoles.c21
-rw-r--r--fs/pstore/platform.c25
-rw-r--r--fs/pstore/ram.c44
-rw-r--r--fs/pstore/ram_core.c20
-rw-r--r--fs/pstore/ram_internal.h98
-rw-r--r--fs/pstore/zone.c2
-rw-r--r--include/asm-generic/tlb.h4
-rw-r--r--include/linux/can/platform/sja1000.h2
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/console.h129
-rw-r--r--include/linux/fs.h20
-rw-r--r--include/linux/gfp.h18
-rw-r--r--include/linux/kasan.h5
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/license.h2
-rw-r--r--include/linux/mm.h29
-rw-r--r--include/linux/mmc/mmc.h2
-rw-r--r--include/linux/nsproxy.h1
-rw-r--r--include/linux/percpu.h2
-rw-r--r--include/linux/pgtable.h18
-rw-r--r--include/linux/platform_data/st33zp24.h16
-rw-r--r--include/linux/pstore_ram.h99
-rw-r--r--include/linux/rcupdate.h14
-rw-r--r--include/linux/rcutiny.h8
-rw-r--r--include/linux/rcutree.h4
-rw-r--r--include/linux/seccomp.h1
-rw-r--r--include/linux/serial_core.h10
-rw-r--r--include/linux/slab.h75
-rw-r--r--include/linux/slab_def.h2
-rw-r--r--include/linux/slub_def.h8
-rw-r--r--include/linux/srcu.h72
-rw-r--r--include/linux/srcutree.h5
-rw-r--r--include/linux/swapops.h8
-rw-r--r--include/net/bluetooth/hci.h12
-rw-r--r--include/net/ping.h3
-rw-r--r--include/net/sctp/stream_sched.h2
-rw-r--r--include/trace/events/fscache.h2
-rw-r--r--include/uapi/linux/elf.h14
-rw-r--r--io_uring/io_uring.c4
-rw-r--r--ipc/sem.c3
-rw-r--r--kernel/bpf/bpf_local_storage.c2
-rw-r--r--kernel/cgroup/cgroup-internal.h1
-rw-r--r--kernel/configs/tiny.config5
-rw-r--r--kernel/debug/kdb/kdb_io.c18
-rw-r--r--kernel/events/core.c19
-rw-r--r--kernel/fork.c26
-rw-r--r--kernel/kcsan/core.c50
-rw-r--r--kernel/nsproxy.c23
-rw-r--r--kernel/printk/printk.c498
-rw-r--r--kernel/printk/printk_ringbuffer.c2
-rw-r--r--kernel/rcu/Kconfig22
-rw-r--r--kernel/rcu/Kconfig.debug3
-rw-r--r--kernel/rcu/rcu.h16
-rw-r--r--kernel/rcu/rcuscale.c69
-rw-r--r--kernel/rcu/rcutorture.c72
-rw-r--r--kernel/rcu/srcutiny.c10
-rw-r--r--kernel/rcu/srcutree.c100
-rw-r--r--kernel/rcu/sync.c2
-rw-r--r--kernel/rcu/tasks.h2
-rw-r--r--kernel/rcu/tiny.c2
-rw-r--r--kernel/rcu/tree.c152
-rw-r--r--kernel/rcu/tree.h12
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_nocb.h259
-rw-r--r--kernel/rcu/tree_plugin.h5
-rw-r--r--kernel/rcu/update.c18
-rw-r--r--kernel/sysctl.c30
-rw-r--r--kernel/trace/trace.c11
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_dynevent.c2
-rw-r--r--kernel/trace/trace_events.c13
-rw-r--r--kernel/trace/trace_events_hist.c7
-rw-r--r--kernel/trace/trace_events_synth.c2
-rw-r--r--kernel/trace/trace_events_user.c4
-rw-r--r--kernel/trace/trace_osnoise.c6
-rw-r--r--kernel/workqueue.c2
-rw-r--r--lib/Kconfig.debug9
-rw-r--r--lib/Kconfig.kasan2
-rw-r--r--lib/Kconfig.kcsan6
-rw-r--r--lib/is_signed_type_kunit.c4
-rw-r--r--lib/percpu-refcount.c3
-rw-r--r--lib/slub_kunit.c57
-rw-r--r--lib/test_printf.c38
-rw-r--r--lib/vsprintf.c2
-rw-r--r--mm/Kconfig38
-rw-r--r--mm/Kconfig.debug2
-rw-r--r--mm/compaction.c22
-rw-r--r--mm/damon/sysfs.c46
-rw-r--r--mm/gup.c2
-rw-r--r--mm/hugetlb.c27
-rw-r--r--mm/kasan/generic.c19
-rw-r--r--mm/khugepaged.c62
-rw-r--r--mm/madvise.c6
-rw-r--r--mm/memcontrol.c15
-rw-r--r--mm/memory.c25
-rw-r--r--mm/migrate.c15
-rw-r--r--mm/mmap.c17
-rw-r--r--mm/mmu_gather.c4
-rw-r--r--mm/shmem.c11
-rw-r--r--mm/slab.c113
-rw-r--r--mm/slab.h86
-rw-r--r--mm/slab_common.c27
-rw-r--r--mm/slub.c553
-rw-r--r--mm/vmscan.c10
-rw-r--r--net/9p/trans_fd.c4
-rw-r--r--net/bluetooth/6lowpan.c1
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/hci_codec.c19
-rw-r--r--net/bluetooth/hci_core.c8
-rw-r--r--net/bluetooth/hci_request.c2
-rw-r--r--net/bluetooth/hci_sync.c19
-rw-r--r--net/bluetooth/iso.c1
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/can/af_can.c6
-rw-r--r--net/core/dst.c2
-rw-r--r--net/dsa/tag_hellcreek.c3
-rw-r--r--net/dsa/tag_ksz.c3
-rw-r--r--net/dsa/tag_sja1105.c3
-rw-r--r--net/hsr/hsr_forward.c5
-rw-r--r--net/ipv4/devinet.c19
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c9
-rw-r--r--net/ipv4/ip_gre.c48
-rw-r--r--net/ipv4/ping.c7
-rw-r--r--net/ipv6/ip6_output.c5
-rw-r--r--net/mac80211/airtime.c3
-rw-r--r--net/mac802154/iface.c1
-rw-r--r--net/mptcp/protocol.c13
-rw-r--r--net/mptcp/subflow.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c19
-rw-r--r--net/netfilter/nf_flow_table_offload.c6
-rw-r--r--net/netfilter/nft_set_pipapo.c5
-rw-r--r--net/nfc/nci/ntf.c6
-rw-r--r--net/packet/af_packet.c6
-rw-r--r--net/sctp/stream.c25
-rw-r--r--net/sctp/stream_sched.c5
-rw-r--r--net/sctp/stream_sched_prio.c19
-rw-r--r--net/sctp/stream_sched_rr.c5
-rw-r--r--net/tipc/crypto.c3
-rw-r--r--net/tipc/link.c4
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/unix/diag.c20
-rw-r--r--net/wireless/scan.c10
-rw-r--r--security/keys/trusted-keys/trusted_tee.c3
-rw-r--r--sound/firewire/dice/dice-stream.c12
-rw-r--r--sound/soc/codecs/cs42l51.c2
-rw-r--r--sound/soc/codecs/tlv320adc3xxx.c3
-rw-r--r--sound/soc/fsl/fsl_micfil.c19
-rw-r--r--sound/soc/soc-ops.c11
-rw-r--r--tools/lib/bpf/libbpf.c2
-rw-r--r--tools/lib/bpf/libbpf_probes.c2
-rw-r--r--tools/lib/bpf/ringbuf.c26
-rw-r--r--tools/memory-model/Documentation/explanation.txt7
-rw-r--r--tools/objtool/check.c10
-rw-r--r--tools/testing/selftests/bpf/map_tests/sk_storage_map.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c8
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c6
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c178
-rw-r--r--tools/testing/selftests/net/config2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh11
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh37
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh2
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh2
-rw-r--r--tools/testing/selftests/nolibc/Makefile3
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c7
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config2csv.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config_override.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh49
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-transform.sh68
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh145
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c6
-rw-r--r--tools/testing/selftests/timens/.gitignore1
-rw-r--r--tools/testing/selftests/timens/Makefile2
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c139
-rw-r--r--tools/vm/slabinfo-gnuplot.sh4
-rw-r--r--tools/vm/slabinfo.c6
485 files changed, 5983 insertions, 2980 deletions
diff --git a/.clang-format b/.clang-format
index 1247d54f9e49..bcf60344f9f5 100644
--- a/.clang-format
+++ b/.clang-format
@@ -222,6 +222,7 @@ ForEachMacros:
- 'for_each_component_dais'
- 'for_each_component_dais_safe'
- 'for_each_console'
+ - 'for_each_console_srcu'
- 'for_each_cpu'
- 'for_each_cpu_and'
- 'for_each_cpu_not'
@@ -535,6 +536,7 @@ ForEachMacros:
- 'perf_hpp_list__for_each_sort_list_safe'
- 'perf_pmu__for_each_hybrid_pmu'
- 'ping_portaddr_for_each_entry'
+ - 'ping_portaddr_for_each_entry_rcu'
- 'plist_for_each'
- 'plist_for_each_continue'
- 'plist_for_each_entry'
diff --git a/.mailmap b/.mailmap
index 4a14ece4cdb7..2301c29ccf6d 100644
--- a/.mailmap
+++ b/.mailmap
@@ -287,6 +287,7 @@ Matthew Wilcox <[email protected]> <[email protected]>
Matthieu CASTET <[email protected]>
Matt Ranostay <[email protected]> Matthew Ranostay <[email protected]>
@@ -372,6 +373,8 @@ Ricardo Ribalda <[email protected]> <[email protected]>
Rudolf Marek <[email protected]>
Rui Saraiva <[email protected]>
@@ -391,6 +394,7 @@ Sebastian Reichel <[email protected]> <[email protected]>
Sebastian Reichel <[email protected]> <[email protected]>
diff --git a/Documentation/RCU/arrayRCU.rst b/Documentation/RCU/arrayRCU.rst
deleted file mode 100644
index a5f2ff8fc54c..000000000000
--- a/Documentation/RCU/arrayRCU.rst
+++ /dev/null
@@ -1,165 +0,0 @@
-.. _array_rcu_doc:
-
-Using RCU to Protect Read-Mostly Arrays
-=======================================
-
-Although RCU is more commonly used to protect linked lists, it can
-also be used to protect arrays. Three situations are as follows:
-
-1. :ref:`Hash Tables <hash_tables>`
-
-2. :ref:`Static Arrays <static_arrays>`
-
-3. :ref:`Resizable Arrays <resizable_arrays>`
-
-Each of these three situations involves an RCU-protected pointer to an
-array that is separately indexed. It might be tempting to consider use
-of RCU to instead protect the index into an array, however, this use
-case is **not** supported. The problem with RCU-protected indexes into
-arrays is that compilers can play way too many optimization games with
-integers, which means that the rules governing handling of these indexes
-are far more trouble than they are worth. If RCU-protected indexes into
-arrays prove to be particularly valuable (which they have not thus far),
-explicit cooperation from the compiler will be required to permit them
-to be safely used.
-
-That aside, each of the three RCU-protected pointer situations are
-described in the following sections.
-
-.. _hash_tables:
-
-Situation 1: Hash Tables
-------------------------
-
-Hash tables are often implemented as an array, where each array entry
-has a linked-list hash chain. Each hash chain can be protected by RCU
-as described in listRCU.rst. This approach also applies to other
-array-of-list situations, such as radix trees.
-
-.. _static_arrays:
-
-Situation 2: Static Arrays
---------------------------
-
-Static arrays, where the data (rather than a pointer to the data) is
-located in each array element, and where the array is never resized,
-have not been used with RCU. Rik van Riel recommends using seqlock in
-this situation, which would also have minimal read-side overhead as long
-as updates are rare.
-
-Quick Quiz:
- Why is it so important that updates be rare when using seqlock?
-
-:ref:`Answer to Quick Quiz <answer_quick_quiz_seqlock>`
-
-.. _resizable_arrays:
-
-Situation 3: Resizable Arrays
-------------------------------
-
-Use of RCU for resizable arrays is demonstrated by the grow_ary()
-function formerly used by the System V IPC code. The array is used
-to map from semaphore, message-queue, and shared-memory IDs to the data
-structure that represents the corresponding IPC construct. The grow_ary()
-function does not acquire any locks; instead its caller must hold the
-ids->sem semaphore.
-
-The grow_ary() function, shown below, does some limit checks, allocates a
-new ipc_id_ary, copies the old to the new portion of the new, initializes
-the remainder of the new, updates the ids->entries pointer to point to
-the new array, and invokes ipc_rcu_putref() to free up the old array.
-Note that rcu_assign_pointer() is used to update the ids->entries pointer,
-which includes any memory barriers required on whatever architecture
-you are running on::
-
- static int grow_ary(struct ipc_ids* ids, int newsize)
- {
- struct ipc_id_ary* new;
- struct ipc_id_ary* old;
- int i;
- int size = ids->entries->size;
-
- if(newsize > IPCMNI)
- newsize = IPCMNI;
- if(newsize <= size)
- return newsize;
-
- new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
- sizeof(struct ipc_id_ary));
- if(new == NULL)
- return size;
- new->size = newsize;
- memcpy(new->p, ids->entries->p,
- sizeof(struct kern_ipc_perm *)*size +
- sizeof(struct ipc_id_ary));
- for(i=size;i<newsize;i++) {
- new->p[i] = NULL;
- }
- old = ids->entries;
-
- /*
- * Use rcu_assign_pointer() to make sure the memcpyed
- * contents of the new array are visible before the new
- * array becomes visible.
- */
- rcu_assign_pointer(ids->entries, new);
-
- ipc_rcu_putref(old);
- return newsize;
- }
-
-The ipc_rcu_putref() function decrements the array's reference count
-and then, if the reference count has dropped to zero, uses call_rcu()
-to free the array after a grace period has elapsed.
-
-The array is traversed by the ipc_lock() function. This function
-indexes into the array under the protection of rcu_read_lock(),
-using rcu_dereference() to pick up the pointer to the array so
-that it may later safely be dereferenced -- memory barriers are
-required on the Alpha CPU. Since the size of the array is stored
-with the array itself, there can be no array-size mismatches, so
-a simple check suffices. The pointer to the structure corresponding
-to the desired IPC object is placed in "out", with NULL indicating
-a non-existent entry. After acquiring "out->lock", the "out->deleted"
-flag indicates whether the IPC object is in the process of being
-deleted, and, if not, the pointer is returned::
-
- struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
- {
- struct kern_ipc_perm* out;
- int lid = id % SEQ_MULTIPLIER;
- struct ipc_id_ary* entries;
-
- rcu_read_lock();
- entries = rcu_dereference(ids->entries);
- if(lid >= entries->size) {
- rcu_read_unlock();
- return NULL;
- }
- out = entries->p[lid];
- if(out == NULL) {
- rcu_read_unlock();
- return NULL;
- }
- spin_lock(&out->lock);
-
- /* ipc_rmid() may have already freed the ID while ipc_lock
- * was spinning: here verify that the structure is still valid
- */
- if (out->deleted) {
- spin_unlock(&out->lock);
- rcu_read_unlock();
- return NULL;
- }
- return out;
- }
-
-.. _answer_quick_quiz_seqlock:
-
-Answer to Quick Quiz:
- Why is it so important that updates be rare when using seqlock?
-
- The reason that it is important that updates be rare when
- using seqlock is that frequent updates can livelock readers.
- One way to avoid this problem is to assign a seqlock for
- each array entry rather than to the entire array.
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index 048c5bc1f813..cc361fb01ed4 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -32,8 +32,8 @@ over a rather long period of time, but improvements are always welcome!
for lockless updates. This does result in the mildly
counter-intuitive situation where rcu_read_lock() and
rcu_read_unlock() are used to protect updates, however, this
- approach provides the same potential simplifications that garbage
- collectors do.
+ approach can provide the same simplifications to certain types
+ of lockless algorithms that garbage collectors do.
1. Does the update code have proper mutual exclusion?
@@ -49,12 +49,12 @@ over a rather long period of time, but improvements are always welcome!
them -- even x86 allows later loads to be reordered to precede
earlier stores), and be prepared to explain why this added
complexity is worthwhile. If you choose #c, be prepared to
- explain how this single task does not become a major bottleneck on
- big multiprocessor machines (for example, if the task is updating
- information relating to itself that other tasks can read, there
- by definition can be no bottleneck). Note that the definition
- of "large" has changed significantly: Eight CPUs was "large"
- in the year 2000, but a hundred CPUs was unremarkable in 2017.
+ explain how this single task does not become a major bottleneck
+ on large systems (for example, if the task is updating information
+ relating to itself that other tasks can read, there by definition
+ can be no bottleneck). Note that the definition of "large" has
+ changed significantly: Eight CPUs was "large" in the year 2000,
+ but a hundred CPUs was unremarkable in 2017.
2. Do the RCU read-side critical sections make proper use of
rcu_read_lock() and friends? These primitives are needed
@@ -97,33 +97,38 @@ over a rather long period of time, but improvements are always welcome!
b. Proceed as in (a) above, but also maintain per-element
locks (that are acquired by both readers and writers)
- that guard per-element state. Of course, fields that
- the readers refrain from accessing can be guarded by
- some other lock acquired only by updaters, if desired.
+ that guard per-element state. Fields that the readers
+ refrain from accessing can be guarded by some other lock
+ acquired only by updaters, if desired.
- This works quite well, also.
+ This also works quite well.
c. Make updates appear atomic to readers. For example,
pointer updates to properly aligned fields will
appear atomic, as will individual atomic primitives.
Sequences of operations performed under a lock will *not*
appear to be atomic to RCU readers, nor will sequences
- of multiple atomic primitives.
+ of multiple atomic primitives. One alternative is to
+ move multiple individual fields to a separate structure,
+ thus solving the multiple-field problem by imposing an
+ additional level of indirection.
This can work, but is starting to get a bit tricky.
- d. Carefully order the updates and the reads so that
- readers see valid data at all phases of the update.
- This is often more difficult than it sounds, especially
- given modern CPUs' tendency to reorder memory references.
- One must usually liberally sprinkle memory barriers
- (smp_wmb(), smp_rmb(), smp_mb()) through the code,
- making it difficult to understand and to test.
-
- It is usually better to group the changing data into
- a separate structure, so that the change may be made
- to appear atomic by updating a pointer to reference
- a new structure containing updated values.
+ d. Carefully order the updates and the reads so that readers
+ see valid data at all phases of the update. This is often
+ more difficult than it sounds, especially given modern
+ CPUs' tendency to reorder memory references. One must
+ usually liberally sprinkle memory-ordering operations
+ through the code, making it difficult to understand and
+ to test. Where it works, it is better to use things
+ like smp_store_release() and smp_load_acquire(), but in
+ some cases the smp_mb() full memory barrier is required.
+
+ As noted earlier, it is usually better to group the
+ changing data into a separate structure, so that the
+ change may be made to appear atomic by updating a pointer
+ to reference a new structure containing updated values.
4. Weakly ordered CPUs pose special challenges. Almost all CPUs
are weakly ordered -- even x86 CPUs allow later loads to be
@@ -188,26 +193,29 @@ over a rather long period of time, but improvements are always welcome!
when publicizing a pointer to a structure that can
be traversed by an RCU read-side critical section.
-5. If call_rcu() or call_srcu() is used, the callback function will
- be called from softirq context. In particular, it cannot block.
- If you need the callback to block, run that code in a workqueue
- handler scheduled from the callback. The queue_rcu_work()
- function does this for you in the case of call_rcu().
+5. If any of call_rcu(), call_srcu(), call_rcu_tasks(),
+ call_rcu_tasks_rude(), or call_rcu_tasks_trace() is used,
+ the callback function may be invoked from softirq context,
+ and in any case with bottom halves disabled. In particular,
+ this callback function cannot block. If you need the callback
+ to block, run that code in a workqueue handler scheduled from
+ the callback. The queue_rcu_work() function does this for you
+ in the case of call_rcu().
6. Since synchronize_rcu() can block, it cannot be called
from any sort of irq context. The same rule applies
- for synchronize_srcu(), synchronize_rcu_expedited(), and
- synchronize_srcu_expedited().
+ for synchronize_srcu(), synchronize_rcu_expedited(),
+ synchronize_srcu_expedited(), synchronize_rcu_tasks(),
+ synchronize_rcu_tasks_rude(), and synchronize_rcu_tasks_trace().
The expedited forms of these primitives have the same semantics
- as the non-expedited forms, but expediting is both expensive and
- (with the exception of synchronize_srcu_expedited()) unfriendly
- to real-time workloads. Use of the expedited primitives should
- be restricted to rare configuration-change operations that would
- not normally be undertaken while a real-time workload is running.
- However, real-time workloads can use rcupdate.rcu_normal kernel
- boot parameter to completely disable expedited grace periods,
- though this might have performance implications.
+ as the non-expedited forms, but expediting is more CPU intensive.
+ Use of the expedited primitives should be restricted to rare
+ configuration-change operations that would not normally be
+ undertaken while a real-time workload is running. Note that
+ IPI-sensitive real-time workloads can use the rcupdate.rcu_normal
+ kernel boot parameter to completely disable expedited grace
+ periods, though this might have performance implications.
In particular, if you find yourself invoking one of the expedited
primitives repeatedly in a loop, please do everyone a favor:
@@ -215,8 +223,9 @@ over a rather long period of time, but improvements are always welcome!
a single non-expedited primitive to cover the entire batch.
This will very likely be faster than the loop containing the
expedited primitive, and will be much much easier on the rest
- of the system, especially to real-time workloads running on
- the rest of the system.
+ of the system, especially to real-time workloads running on the
+ rest of the system. Alternatively, instead use asynchronous
+ primitives such as call_rcu().
7. As of v4.20, a given kernel implements only one RCU flavor, which
is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
@@ -239,7 +248,8 @@ over a rather long period of time, but improvements are always welcome!
the corresponding readers must use rcu_read_lock_trace() and
rcu_read_unlock_trace(). If an updater uses call_rcu_tasks_rude()
or synchronize_rcu_tasks_rude(), then the corresponding readers
- must use anything that disables interrupts.
+ must use anything that disables preemption, for example,
+ preempt_disable() and preempt_enable().
Mixing things up will result in confusion and broken kernels, and
has even resulted in an exploitable security issue. Therefore,
@@ -253,15 +263,16 @@ over a rather long period of time, but improvements are always welcome!
that this usage is safe is that readers can use anything that
disables BH when updaters use call_rcu() or synchronize_rcu().
-8. Although synchronize_rcu() is slower than is call_rcu(), it
- usually results in simpler code. So, unless update performance is
- critically important, the updaters cannot block, or the latency of
- synchronize_rcu() is visible from userspace, synchronize_rcu()
- should be used in preference to call_rcu(). Furthermore,
- kfree_rcu() usually results in even simpler code than does
- synchronize_rcu() without synchronize_rcu()'s multi-millisecond
- latency. So please take advantage of kfree_rcu()'s "fire and
- forget" memory-freeing capabilities where it applies.
+8. Although synchronize_rcu() is slower than is call_rcu(),
+ it usually results in simpler code. So, unless update
+ performance is critically important, the updaters cannot block,
+ or the latency of synchronize_rcu() is visible from userspace,
+ synchronize_rcu() should be used in preference to call_rcu().
+ Furthermore, kfree_rcu() and kvfree_rcu() usually result
+ in even simpler code than does synchronize_rcu() without
+ synchronize_rcu()'s multi-millisecond latency. So please take
+ advantage of kfree_rcu()'s and kvfree_rcu()'s "fire and forget"
+ memory-freeing capabilities where it applies.
An especially important property of the synchronize_rcu()
primitive is that it automatically self-limits: if grace periods
@@ -271,8 +282,8 @@ over a rather long period of time, but improvements are always welcome!
cases where grace periods are delayed, as failing to do so can
result in excessive realtime latencies or even OOM conditions.
- Ways of gaining this self-limiting property when using call_rcu()
- include:
+ Ways of gaining this self-limiting property when using call_rcu(),
+ kfree_rcu(), or kvfree_rcu() include:
a. Keeping a count of the number of data-structure elements
used by the RCU-protected data structure, including
@@ -304,18 +315,21 @@ over a rather long period of time, but improvements are always welcome!
here is that superuser already has lots of ways to crash
the machine.
- d. Periodically invoke synchronize_rcu(), permitting a limited
- number of updates per grace period. Better yet, periodically
- invoke rcu_barrier() to wait for all outstanding callbacks.
+ d. Periodically invoke rcu_barrier(), permitting a limited
+ number of updates per grace period.
- The same cautions apply to call_srcu() and kfree_rcu().
+ The same cautions apply to call_srcu(), call_rcu_tasks(),
+ call_rcu_tasks_rude(), and call_rcu_tasks_trace(). This is
+ why there is an srcu_barrier(), rcu_barrier_tasks(),
+ rcu_barrier_tasks_rude(), and rcu_barrier_tasks_rude(),
+ respectively.
- Note that although these primitives do take action to avoid memory
- exhaustion when any given CPU has too many callbacks, a determined
- user could still exhaust memory. This is especially the case
- if a system with a large number of CPUs has been configured to
- offload all of its RCU callbacks onto a single CPU, or if the
- system has relatively little free memory.
+ Note that although these primitives do take action to avoid
+ memory exhaustion when any given CPU has too many callbacks,
+ a determined user or administrator can still exhaust memory.
+ This is especially the case if a system with a large number of
+ CPUs has been configured to offload all of its RCU callbacks onto
+ a single CPU, or if the system has relatively little free memory.
9. All RCU list-traversal primitives, which include
rcu_dereference(), list_for_each_entry_rcu(), and
@@ -344,14 +358,14 @@ over a rather long period of time, but improvements are always welcome!
and you don't hold the appropriate update-side lock, you *must*
use the "_rcu()" variants of the list macros. Failing to do so
will break Alpha, cause aggressive compilers to generate bad code,
- and confuse people trying to read your code.
+ and confuse people trying to understand your code.
11. Any lock acquired by an RCU callback must be acquired elsewhere
- with softirq disabled, e.g., via spin_lock_irqsave(),
- spin_lock_bh(), etc. Failing to disable softirq on a given
- acquisition of that lock will result in deadlock as soon as
- the RCU softirq handler happens to run your RCU callback while
- interrupting that acquisition's critical section.
+ with softirq disabled, e.g., via spin_lock_bh(). Failing to
+ disable softirq on a given acquisition of that lock will result
+ in deadlock as soon as the RCU softirq handler happens to run
+ your RCU callback while interrupting that acquisition's critical
+ section.
12. RCU callbacks can be and are executed in parallel. In many cases,
the callback code simply wrappers around kfree(), so that this
@@ -372,7 +386,17 @@ over a rather long period of time, but improvements are always welcome!
for some real-time workloads, this is the whole point of using
the rcu_nocbs= kernel boot parameter.
-13. Unlike other forms of RCU, it *is* permissible to block in an
+ In addition, do not assume that callbacks queued in a given order
+ will be invoked in that order, even if they all are queued on the
+ same CPU. Furthermore, do not assume that same-CPU callbacks will
+ be invoked serially. For example, in recent kernels, CPUs can be
+ switched between offloaded and de-offloaded callback invocation,
+ and while a given CPU is undergoing such a switch, its callbacks
+ might be concurrently invoked by that CPU's softirq handler and
+ that CPU's rcuo kthread. At such times, that CPU's callbacks
+ might be executed both concurrently and out of order.
+
+13. Unlike most flavors of RCU, it *is* permissible to block in an
SRCU read-side critical section (demarked by srcu_read_lock()
and srcu_read_unlock()), hence the "SRCU": "sleepable RCU".
Please note that if you don't need to sleep in read-side critical
@@ -412,6 +436,12 @@ over a rather long period of time, but improvements are always welcome!
never sends IPIs to other CPUs, so it is easier on
real-time workloads than is synchronize_rcu_expedited().
+ It is also permissible to sleep in RCU Tasks Trace read-side
+ critical, which are delimited by rcu_read_lock_trace() and
+ rcu_read_unlock_trace(). However, this is a specialized flavor
+ of RCU, and you should not use it without first checking with
+ its current users. In most cases, you should instead use SRCU.
+
Note that rcu_assign_pointer() relates to SRCU just as it does to
other forms of RCU, but instead of rcu_dereference() you should
use srcu_dereference() in order to avoid lockdep splats.
@@ -442,50 +472,62 @@ over a rather long period of time, but improvements are always welcome!
find problems as follows:
CONFIG_PROVE_LOCKING:
- check that accesses to RCU-protected data
- structures are carried out under the proper RCU
- read-side critical section, while holding the right
- combination of locks, or whatever other conditions
- are appropriate.
+ check that accesses to RCU-protected data structures
+ are carried out under the proper RCU read-side critical
+ section, while holding the right combination of locks,
+ or whatever other conditions are appropriate.
CONFIG_DEBUG_OBJECTS_RCU_HEAD:
- check that you don't pass the
- same object to call_rcu() (or friends) before an RCU
- grace period has elapsed since the last time that you
- passed that same object to call_rcu() (or friends).
+ check that you don't pass the same object to call_rcu()
+ (or friends) before an RCU grace period has elapsed
+ since the last time that you passed that same object to
+ call_rcu() (or friends).
__rcu sparse checks:
- tag the pointer to the RCU-protected data
- structure with __rcu, and sparse will warn you if you
- access that pointer without the services of one of the
- variants of rcu_dereference().
+ tag the pointer to the RCU-protected data structure
+ with __rcu, and sparse will warn you if you access that
+ pointer without the services of one of the variants
+ of rcu_dereference().
These debugging aids can help you find problems that are
otherwise extremely difficult to spot.
-17. If you register a callback using call_rcu() or call_srcu(), and
- pass in a function defined within a loadable module, then it in
- necessary to wait for all pending callbacks to be invoked after
- the last invocation and before unloading that module. Note that
- it is absolutely *not* sufficient to wait for a grace period!
- The current (say) synchronize_rcu() implementation is *not*
- guaranteed to wait for callbacks registered on other CPUs.
- Or even on the current CPU if that CPU recently went offline
- and came back online.
+17. If you pass a callback function defined within a module to one of
+ call_rcu(), call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(),
+ or call_rcu_tasks_trace(), then it is necessary to wait for all
+ pending callbacks to be invoked before unloading that module.
+ Note that it is absolutely *not* sufficient to wait for a grace
+ period! For example, synchronize_rcu() implementation is *not*
+ guaranteed to wait for callbacks registered on other CPUs via
+ call_rcu(). Or even on the current CPU if that CPU recently
+ went offline and came back online.
You instead need to use one of the barrier functions:
- call_rcu() -> rcu_barrier()
- call_srcu() -> srcu_barrier()
+ - call_rcu_tasks() -> rcu_barrier_tasks()
+ - call_rcu_tasks_rude() -> rcu_barrier_tasks_rude()
+ - call_rcu_tasks_trace() -> rcu_barrier_tasks_trace()
However, these barrier functions are absolutely *not* guaranteed
- to wait for a grace period. In fact, if there are no call_rcu()
- callbacks waiting anywhere in the system, rcu_barrier() is within
- its rights to return immediately.
-
- So if you need to wait for both an RCU grace period and for
- all pre-existing call_rcu() callbacks, you will need to execute
- both rcu_barrier() and synchronize_rcu(), if necessary, using
- something like workqueues to execute them concurrently.
+ to wait for a grace period. For example, if there are no
+ call_rcu() callbacks queued anywhere in the system, rcu_barrier()
+ can and will return immediately.
+
+ So if you need to wait for both a grace period and for all
+ pre-existing callbacks, you will need to invoke both functions,
+ with the pair depending on the flavor of RCU:
+
+ - Either synchronize_rcu() or synchronize_rcu_expedited(),
+ together with rcu_barrier()
+ - Either synchronize_srcu() or synchronize_srcu_expedited(),
+ together with and srcu_barrier()
+ - synchronize_rcu_tasks() and rcu_barrier_tasks()
+ - synchronize_tasks_rude() and rcu_barrier_tasks_rude()
+ - synchronize_tasks_trace() and rcu_barrier_tasks_trace()
+
+ If necessary, you can use something like workqueues to execute
+ the requisite pair of functions concurrently.
See rcubarrier.rst for more information.
diff --git a/Documentation/RCU/index.rst b/Documentation/RCU/index.rst
index e703d3dbe60c..84a79903f6a8 100644
--- a/Documentation/RCU/index.rst
+++ b/Documentation/RCU/index.rst
@@ -9,7 +9,6 @@ RCU concepts
.. toctree::
:maxdepth: 3
- arrayRCU
checklist
lockdep
lockdep-splat
diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst
index 2a643e293fb4..bdc4bcc5289f 100644
--- a/Documentation/RCU/listRCU.rst
+++ b/Documentation/RCU/listRCU.rst
@@ -3,11 +3,10 @@
Using RCU to Protect Read-Mostly Linked Lists
=============================================
-One of the best applications of RCU is to protect read-mostly linked lists
-(``struct list_head`` in list.h). One big advantage of this approach
-is that all of the required memory barriers are included for you in
-the list macros. This document describes several applications of RCU,
-with the best fits first.
+One of the most common uses of RCU is protecting read-mostly linked lists
+(``struct list_head`` in list.h). One big advantage of this approach is
+that all of the required memory ordering is provided by the list macros.
+This document describes several list-based RCU use cases.
Example 1: Read-mostly list: Deferred Destruction
@@ -35,7 +34,8 @@ The code traversing the list of all processes typically looks like::
}
rcu_read_unlock();
-The simplified code for removing a process from a task list is::
+The simplified and heavily inlined code for removing a process from a
+task list is::
void release_task(struct task_struct *p)
{
@@ -45,39 +45,48 @@ The simplified code for removing a process from a task list is::
call_rcu(&p->rcu, delayed_put_task_struct);
}
-When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)`` under
-``tasklist_lock`` writer lock protection, to remove the task from the list of
-all tasks. The ``tasklist_lock`` prevents concurrent list additions/removals
-from corrupting the list. Readers using ``for_each_process()`` are not protected
-with the ``tasklist_lock``. To prevent readers from noticing changes in the list
-pointers, the ``task_struct`` object is freed only after one or more grace
-periods elapse (with the help of call_rcu()). This deferring of destruction
-ensures that any readers traversing the list will see valid ``p->tasks.next``
-pointers and deletion/freeing can happen in parallel with traversal of the list.
-This pattern is also called an **existence lock**, since RCU pins the object in
-memory until all existing readers finish.
+When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)``
+via __exit_signal() and __unhash_process() under ``tasklist_lock``
+writer lock protection. The list_del_rcu() invocation removes
+the task from the list of all tasks. The ``tasklist_lock``
+prevents concurrent list additions/removals from corrupting the
+list. Readers using ``for_each_process()`` are not protected with the
+``tasklist_lock``. To prevent readers from noticing changes in the list
+pointers, the ``task_struct`` object is freed only after one or more
+grace periods elapse, with the help of call_rcu(), which is invoked via
+put_task_struct_rcu_user(). This deferring of destruction ensures that
+any readers traversing the list will see valid ``p->tasks.next`` pointers
+and deletion/freeing can happen in parallel with traversal of the list.
+This pattern is also called an **existence lock**, since RCU refrains
+from invoking the delayed_put_task_struct() callback function until
+all existing readers finish, which guarantees that the ``task_struct``
+object in question will remain in existence until after the completion
+of all RCU readers that might possibly have a reference to that object.
Example 2: Read-Side Action Taken Outside of Lock: No In-Place Updates
----------------------------------------------------------------------
-The best applications are cases where, if reader-writer locking were
-used, the read-side lock would be dropped before taking any action
-based on the results of the search. The most celebrated example is
-the routing table. Because the routing table is tracking the state of
-equipment outside of the computer, it will at times contain stale data.
-Therefore, once the route has been computed, there is no need to hold
-the routing table static during transmission of the packet. After all,
-you can hold the routing table static all you want, but that won't keep
-the external Internet from changing, and it is the state of the external
-Internet that really matters. In addition, routing entries are typically
-added or deleted, rather than being modified in place.
-
-A straightforward example of this use of RCU may be found in the
-system-call auditing support. For example, a reader-writer locked
+Some reader-writer locking use cases compute a value while holding
+the read-side lock, but continue to use that value after that lock is
+released. These use cases are often good candidates for conversion
+to RCU. One prominent example involves network packet routing.
+Because the packet-routing data tracks the state of equipment outside
+of the computer, it will at times contain stale data. Therefore, once
+the route has been computed, there is no need to hold the routing table
+static during transmission of the packet. After all, you can hold the
+routing table static all you want, but that won't keep the external
+Internet from changing, and it is the state of the external Internet
+that really matters. In addition, routing entries are typically added
+or deleted, rather than being modified in place. This is a rare example
+of the finite speed of light and the non-zero size of atoms actually
+helping make synchronization be lighter weight.
+
+A straightforward example of this type of RCU use case may be found in
+the system-call auditing support. For example, a reader-writer locked
implementation of ``audit_filter_task()`` might be as follows::
- static enum audit_state audit_filter_task(struct task_struct *tsk)
+ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
{
struct audit_entry *e;
enum audit_state state;
@@ -86,6 +95,8 @@ implementation of ``audit_filter_task()`` might be as follows::
/* Note: audit_filter_mutex held by caller. */
list_for_each_entry(e, &audit_tsklist, list) {
if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ if (state == AUDIT_STATE_RECORD)
+ *key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
read_unlock(&auditsc_lock);
return state;
}
@@ -101,7 +112,7 @@ you are turning auditing off, it is OK to audit a few extra system calls.
This means that RCU can be easily applied to the read side, as follows::
- static enum audit_state audit_filter_task(struct task_struct *tsk)
+ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
{
struct audit_entry *e;
enum audit_state state;
@@ -110,6 +121,8 @@ This means that RCU can be easily applied to the read side, as follows::
/* Note: audit_filter_mutex held by caller. */
list_for_each_entry_rcu(e, &audit_tsklist, list) {
if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ if (state == AUDIT_STATE_RECORD)
+ *key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
rcu_read_unlock();
return state;
}
@@ -118,13 +131,15 @@ This means that RCU can be easily applied to the read side, as follows::
return AUDIT_BUILD_CONTEXT;
}
-The ``read_lock()`` and ``read_unlock()`` calls have become rcu_read_lock()
-and rcu_read_unlock(), respectively, and the list_for_each_entry() has
-become list_for_each_entry_rcu(). The **_rcu()** list-traversal primitives
-insert the read-side memory barriers that are required on DEC Alpha CPUs.
+The read_lock() and read_unlock() calls have become rcu_read_lock()
+and rcu_read_unlock(), respectively, and the list_for_each_entry()
+has become list_for_each_entry_rcu(). The **_rcu()** list-traversal
+primitives add READ_ONCE() and diagnostic checks for incorrect use
+outside of an RCU read-side critical section.
The changes to the update side are also straightforward. A reader-writer lock
-might be used as follows for deletion and insertion::
+might be used as follows for deletion and insertion in these simplified
+versions of audit_del_rule() and audit_add_rule()::
static inline int audit_del_rule(struct audit_rule *rule,
struct list_head *list)
@@ -188,16 +203,16 @@ Following are the RCU equivalents for these two functions::
return 0;
}
-Normally, the ``write_lock()`` and ``write_unlock()`` would be replaced by a
+Normally, the write_lock() and write_unlock() would be replaced by a
spin_lock() and a spin_unlock(). But in this case, all callers hold
``audit_filter_mutex``, so no additional locking is required. The
-``auditsc_lock`` can therefore be eliminated, since use of RCU eliminates the
+auditsc_lock can therefore be eliminated, since use of RCU eliminates the
need for writers to exclude readers.
The list_del(), list_add(), and list_add_tail() primitives have been
replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
-The **_rcu()** list-manipulation primitives add memory barriers that are needed on
-weakly ordered CPUs (most of them!). The list_del_rcu() primitive omits the
+The **_rcu()** list-manipulation primitives add memory barriers that are
+needed on weakly ordered CPUs. The list_del_rcu() primitive omits the
pointer poisoning debug-assist code that would otherwise cause concurrent
readers to fail spectacularly.
@@ -238,7 +253,9 @@ need to be filled in)::
The RCU version creates a copy, updates the copy, then replaces the old
entry with the newly updated entry. This sequence of actions, allowing
concurrent reads while making a copy to perform an update, is what gives
-RCU (*read-copy update*) its name. The RCU code is as follows::
+RCU (*read-copy update*) its name.
+
+The RCU version of audit_upd_rule() is as follows::
static inline int audit_upd_rule(struct audit_rule *rule,
struct list_head *list,
@@ -267,6 +284,9 @@ RCU (*read-copy update*) its name. The RCU code is as follows::
Again, this assumes that the caller holds ``audit_filter_mutex``. Normally, the
writer lock would become a spinlock in this sort of code.
+The update_lsm_rule() does something very similar, for those who would
+prefer to look at real Linux-kernel code.
+
Another use of this pattern can be found in the openswitch driver's *connection
tracking table* code in ``ct_limit_set()``. The table holds connection tracking
entries and has a limit on the maximum entries. There is one such table
@@ -281,9 +301,10 @@ Example 4: Eliminating Stale Data
---------------------------------
The auditing example above tolerates stale data, as do most algorithms
-that are tracking external state. Because there is a delay from the
-time the external state changes before Linux becomes aware of the change,
-additional RCU-induced staleness is generally not a problem.
+that are tracking external state. After all, given there is a delay
+from the time the external state changes before Linux becomes aware
+of the change, and so as noted earlier, a small quantity of additional
+RCU-induced staleness is generally not a problem.
However, there are many examples where stale data cannot be tolerated.
One example in the Linux kernel is the System V IPC (see the shm_lock()
@@ -302,7 +323,7 @@ Quick Quiz:
If the system-call audit module were to ever need to reject stale data, one way
to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
-audit_entry structure, and modify ``audit_filter_task()`` as follows::
+``audit_entry`` structure, and modify audit_filter_task() as follows::
static enum audit_state audit_filter_task(struct task_struct *tsk)
{
@@ -319,6 +340,8 @@ audit_entry structure, and modify ``audit_filter_task()`` as follows::
return AUDIT_BUILD_CONTEXT;
}
rcu_read_unlock();
+ if (state == AUDIT_STATE_RECORD)
+ *key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
return state;
}
}
@@ -326,12 +349,6 @@ audit_entry structure, and modify ``audit_filter_task()`` as follows::
return AUDIT_BUILD_CONTEXT;
}
-Note that this example assumes that entries are only added and deleted.
-Additional mechanism is required to deal correctly with the update-in-place
-performed by ``audit_upd_rule()``. For one thing, ``audit_upd_rule()`` would
-need additional memory barriers to ensure that the list_add_rcu() was really
-executed before the list_del_rcu().
-
The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
spinlock as follows::
@@ -357,24 +374,32 @@ spinlock as follows::
This too assumes that the caller holds ``audit_filter_mutex``.
+Note that this example assumes that entries are only added and deleted.
+Additional mechanism is required to deal correctly with the update-in-place
+performed by audit_upd_rule(). For one thing, audit_upd_rule() would
+need to hold the locks of both the old ``audit_entry`` and its replacement
+while executing the list_replace_rcu().
+
Example 5: Skipping Stale Objects
---------------------------------
-For some usecases, reader performance can be improved by skipping stale objects
-during read-side list traversal if the object in concern is pending destruction
-after one or more grace periods. One such example can be found in the timerfd
-subsystem. When a ``CLOCK_REALTIME`` clock is reprogrammed - for example due to
-setting of the system time, then all programmed timerfds that depend on this
-clock get triggered and processes waiting on them to expire are woken up in
-advance of their scheduled expiry. To facilitate this, all such timers are added
-to an RCU-managed ``cancel_list`` when they are setup in
+For some use cases, reader performance can be improved by skipping
+stale objects during read-side list traversal, where stale objects
+are those that will be removed and destroyed after one or more grace
+periods. One such example can be found in the timerfd subsystem. When a
+``CLOCK_REALTIME`` clock is reprogrammed (for example due to setting
+of the system time) then all programmed ``timerfds`` that depend on
+this clock get triggered and processes waiting on them are awakened in
+advance of their scheduled expiry. To facilitate this, all such timers
+are added to an RCU-managed ``cancel_list`` when they are setup in
``timerfd_setup_cancel()``::
static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
{
spin_lock(&ctx->cancel_lock);
- if ((ctx->clockid == CLOCK_REALTIME &&
+ if ((ctx->clockid == CLOCK_REALTIME ||
+ ctx->clockid == CLOCK_REALTIME_ALARM) &&
(flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
if (!ctx->might_cancel) {
ctx->might_cancel = true;
@@ -382,13 +407,16 @@ to an RCU-managed ``cancel_list`` when they are setup in
list_add_rcu(&ctx->clist, &cancel_list);
spin_unlock(&cancel_lock);
}
+ } else {
+ __timerfd_remove_cancel(ctx);
}
spin_unlock(&ctx->cancel_lock);
}
-When a timerfd is freed (fd is closed), then the ``might_cancel`` flag of the
-timerfd object is cleared, the object removed from the ``cancel_list`` and
-destroyed::
+When a timerfd is freed (fd is closed), then the ``might_cancel``
+flag of the timerfd object is cleared, the object removed from the
+``cancel_list`` and destroyed, as shown in this simplified and inlined
+version of timerfd_release()::
int timerfd_release(struct inode *inode, struct file *file)
{
@@ -403,7 +431,10 @@ destroyed::
}
spin_unlock(&ctx->cancel_lock);
- hrtimer_cancel(&ctx->t.tmr);
+ if (isalarm(ctx))
+ alarm_cancel(&ctx->t.alarm);
+ else
+ hrtimer_cancel(&ctx->t.tmr);
kfree_rcu(ctx, rcu);
return 0;
}
@@ -416,6 +447,7 @@ objects::
void timerfd_clock_was_set(void)
{
+ ktime_t moffs = ktime_mono_to_real(0);
struct timerfd_ctx *ctx;
unsigned long flags;
@@ -424,7 +456,7 @@ objects::
if (!ctx->might_cancel)
continue;
spin_lock_irqsave(&ctx->wqh.lock, flags);
- if (ctx->moffs != ktime_mono_to_real(0)) {
+ if (ctx->moffs != moffs) {
ctx->moffs = KTIME_MAX;
ctx->ticks++;
wake_up_locked_poll(&ctx->wqh, EPOLLIN);
@@ -434,10 +466,10 @@ objects::
rcu_read_unlock();
}
-The key point here is, because RCU-traversal of the ``cancel_list`` happens
-while objects are being added and removed to the list, sometimes the traversal
-can step on an object that has been removed from the list. In this example, it
-is seen that it is better to skip such objects using a flag.
+The key point is that because RCU-protected traversal of the
+``cancel_list`` happens concurrently with object addition and removal,
+sometimes the traversal can access an object that has been removed from
+the list. In this example, a flag is used to skip such objects.
Summary
diff --git a/Documentation/RCU/lockdep.rst b/Documentation/RCU/lockdep.rst
index a94f55991a71..9308f1bdba05 100644
--- a/Documentation/RCU/lockdep.rst
+++ b/Documentation/RCU/lockdep.rst
@@ -17,7 +17,9 @@ state::
rcu_read_lock_held() for normal RCU.
rcu_read_lock_bh_held() for RCU-bh.
rcu_read_lock_sched_held() for RCU-sched.
+ rcu_read_lock_any_held() for any of normal RCU, RCU-bh, and RCU-sched.
srcu_read_lock_held() for SRCU.
+ rcu_read_lock_trace_held() for RCU Tasks Trace.
These functions are conservative, and will therefore return 1 if they
aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set).
@@ -53,6 +55,8 @@ checking of rcu_dereference() primitives:
is invoked by both SRCU readers and updaters.
rcu_dereference_raw(p):
Don't check. (Use sparingly, if at all.)
+ rcu_dereference_raw_check(p):
+ Don't do lockdep at all. (Use sparingly, if at all.)
rcu_dereference_protected(p, c):
Use explicit check expression "c", and omit all barriers
and compiler constraints. This is useful when the data
diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml
index 2ab4642679c0..55c4f94a14d1 100644
--- a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml
@@ -148,7 +148,7 @@ allOf:
items:
- const: oscclk
- const: dout_clkcmu_fsys1_bus
- - const: dout_clkcmu_fsys1_mmc_card
+ - const: gout_clkcmu_fsys1_mmc_card
- const: dout_clkcmu_fsys1_usbdrd
- if:
diff --git a/Documentation/loongarch/booting.rst b/Documentation/loongarch/booting.rst
new file mode 100644
index 000000000000..91eccd410478
--- /dev/null
+++ b/Documentation/loongarch/booting.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+Booting Linux/LoongArch
+=======================
+
+:Author: Yanteng Si <[email protected]>
+:Date: 18 Nov 2022
+
+Information passed from BootLoader to kernel
+============================================
+
+LoongArch supports ACPI and FDT. The information that needs to be passed
+to the kernel includes the memmap, the initrd, the command line, optionally
+the ACPI/FDT tables, and so on.
+
+The kernel is passed the following arguments on `kernel_entry` :
+
+ - a0 = efi_boot: `efi_boot` is a flag indicating whether
+ this boot environment is fully UEFI-compliant.
+
+ - a1 = cmdline: `cmdline` is a pointer to the kernel command line.
+
+ - a2 = systemtable: `systemtable` points to the EFI system table.
+ All pointers involved at this stage are in physical addresses.
+
+Header of Linux/LoongArch kernel images
+=======================================
+
+Linux/LoongArch kernel images are EFI images. Being PE files, they have
+a 64-byte header structured like::
+
+ u32 MZ_MAGIC /* "MZ", MS-DOS header */
+ u32 res0 = 0 /* Reserved */
+ u64 kernel_entry /* Kernel entry point */
+ u64 _end - _text /* Kernel image effective size */
+ u64 load_offset /* Kernel image load offset from start of RAM */
+ u64 res1 = 0 /* Reserved */
+ u64 res2 = 0 /* Reserved */
+ u64 res3 = 0 /* Reserved */
+ u32 LINUX_PE_MAGIC /* Magic number */
+ u32 pe_header - _head /* Offset to the PE header */
diff --git a/Documentation/loongarch/index.rst b/Documentation/loongarch/index.rst
index aaba648db907..c779bfa00c05 100644
--- a/Documentation/loongarch/index.rst
+++ b/Documentation/loongarch/index.rst
@@ -9,6 +9,7 @@ LoongArch Architecture
:numbered:
introduction
+ booting
irq-chip-model
features
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 06f80e3785c5..cc621decd943 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1966,7 +1966,7 @@ There are some more advanced barrier functions:
(*) io_stop_wc();
For memory accesses with write-combining attributes (e.g. those returned
- by ioremap_wc(), the CPU may wait for prior accesses to be merged with
+ by ioremap_wc()), the CPU may wait for prior accesses to be merged with
subsequent ones. io_stop_wc() can be used to prevent the merging of
write-combining memory accesses before this macro with those after it when
such wait has performance implications.
diff --git a/Documentation/mm/slub.rst b/Documentation/mm/slub.rst
index 4e1578186b4f..7f652216dabe 100644
--- a/Documentation/mm/slub.rst
+++ b/Documentation/mm/slub.rst
@@ -116,6 +116,8 @@ options from the ``slub_debug`` parameter translate to the following files::
T trace
A failslab
+failslab file is writable, so writing 1 or 0 will enable or disable
+the option at runtime. Write returns -EINVAL if cache is an alias.
Careful with tracing: It may spew out lots of information and never stop if
used on the wrong slab.
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 75aa5531cc7d..7165927a708e 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -80,7 +80,7 @@ Documentation/memory-barriers.txt
- 메모리 배리어의 종류.
- 메모리 배리어에 대해 가정해선 안될 것.
- - 데이터 의존성 배리어 (역사적).
+ - 주소 데이터 의존성 배리어 (역사적).
- 컨트롤 의존성.
- SMP 배리어 짝맞추기.
- 메모리 배리어 시퀀스의 예.
@@ -217,7 +217,7 @@ Documentation/memory-barriers.txt
P = &B D = *Q;
D 로 읽혀지는 값은 CPU 2 에서 P 로부터 읽혀진 주소값에 의존적이기 때문에 여기엔
-분명한 데이터 의존성이 있습니다. 하지만 이 이벤트들의 실행 결과로는 아래의
+분명한 주소 의존성이 있습니다. 하지만 이 이벤트들의 실행 결과로는 아래의
결과들이 모두 나타날 수 있습니다:
(Q == &A) and (D == 1)
@@ -416,19 +416,19 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니
하나씩 요청해 집어넣습니다. 쓰기 배리어 앞의 모든 스토어 오퍼레이션들은
쓰기 배리어 뒤의 모든 스토어 오퍼레이션들보다 _앞서_ 수행될 겁니다.
- [!] 쓰기 배리어들은 읽기 또는 데이터 의존성 배리어와 함께 짝을 맞춰
+ [!] 쓰기 배리어들은 읽기 또는 주소 의존성 배리어와 함께 짝을 맞춰
사용되어야만 함을 알아두세요; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
- (2) 데이터 의존성 배리어.
+ (2) 주소 의존성 배리어 (역사적).
- 데이터 의존성 배리어는 읽기 배리어의 보다 완화된 형태입니다. 두개의 로드
+ 주소 의존성 배리어는 읽기 배리어의 보다 완화된 형태입니다. 두개의 로드
오퍼레이션이 있고 두번째 것이 첫번째 것의 결과에 의존하고 있을 때(예:
두번째 로드가 참조할 주소를 첫번째 로드가 읽는 경우), 두번째 로드가 읽어올
데이터는 첫번째 로드에 의해 그 주소가 얻어진 뒤에 업데이트 됨을 보장하기
- 위해서 데이터 의존성 배리어가 필요할 수 있습니다.
+ 위해서 주소 의존성 배리어가 필요할 수 있습니다.
- 데이터 의존성 배리어는 상호 의존적인 로드 오퍼레이션들 사이의 부분적 순서
+ 주소 의존성 배리어는 상호 의존적인 로드 오퍼레이션들 사이의 부분적 순서
세우기입니다; 스토어 오퍼레이션들이나 독립적인 로드들, 또는 중복되는
로드들에 대해서는 어떤 영향도 끼치지 않습니다.
@@ -436,37 +436,41 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니
오퍼레이션들을 던져 넣고 있으며, 거기에 관심이 있는 다른 CPU 는 그
오퍼레이션들을 메모리 시스템이 실행한 결과를 인지할 수 있습니다. 이처럼
다른 CPU 의 스토어 오퍼레이션의 결과에 관심을 두고 있는 CPU 가 수행 요청한
- 데이터 의존성 배리어는, 배리어 앞의 어떤 로드 오퍼레이션이 다른 CPU 에서
+ 주소 의존성 배리어는, 배리어 앞의 어떤 로드 오퍼레이션이 다른 CPU 에서
던져 넣은 스토어 오퍼레이션과 같은 영역을 향했다면, 그런 스토어
- 오퍼레이션들이 만들어내는 결과가 데이터 의존성 배리어 뒤의 로드
+ 오퍼레이션들이 만들어내는 결과가 주소 의존성 배리어 뒤의 로드
오퍼레이션들에게는 보일 것을 보장합니다.
이 순서 세우기 제약에 대한 그림을 보기 위해선 "메모리 배리어 시퀀스의 예"
서브섹션을 참고하시기 바랍니다.
- [!] 첫번째 로드는 반드시 _데이터_ 의존성을 가져야지 컨트롤 의존성을 가져야
+ [!] 첫번째 로드는 반드시 _주소_ 의존성을 가져야지 컨트롤 의존성을 가져야
하는게 아님을 알아두십시오. 만약 두번째 로드를 위한 주소가 첫번째 로드에
의존적이지만 그 의존성은 조건적이지 그 주소 자체를 가져오는게 아니라면,
그것은 _컨트롤_ 의존성이고, 이 경우에는 읽기 배리어나 그보다 강력한
무언가가 필요합니다. 더 자세한 내용을 위해서는 "컨트롤 의존성" 서브섹션을
참고하시기 바랍니다.
- [!] 데이터 의존성 배리어는 보통 쓰기 배리어들과 함께 짝을 맞춰 사용되어야
+ [!] 주소 의존성 배리어는 보통 쓰기 배리어들과 함께 짝을 맞춰 사용되어야
합니다; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
+ [!] 커널 v5.9 릴리즈에서 명시적 주소 의존성 배리어를 위한 커널 API 들이
+ 삭제되었습니다. 오늘날에는 공유된 변수들의 로드를 표시하는 READ_ONCE() 나
+ rcu_dereference() 와 같은 API 들은 묵시적으로 주소 의존성 배리어를 제공합니다.
+
(3) 읽기 (또는 로드) 메모리 배리어.
- 읽기 배리어는 데이터 의존성 배리어 기능의 보장사항에 더해서 배리어보다
- 앞서 명시된 모든 LOAD 오퍼레이션들이 배리어 뒤에 명시되는 모든 LOAD
+ 읽기 배리어는 주소 의존성 배리어 기능의 보장사항에 더해서 배리어보다 앞서
+ 명시된 모든 LOAD 오퍼레이션들이 배리어 뒤에 명시되는 모든 LOAD
오퍼레이션들보다 먼저 행해진 것으로 시스템의 다른 컴포넌트들에 보여질 것을
보장합니다.
읽기 배리어는 로드 오퍼레이션에 행해지는 부분적 순서 세우기입니다; 스토어
오퍼레이션에 대해서는 어떤 영향도 끼치지 않습니다.
- 읽기 메모리 배리어는 데이터 의존성 배리어를 내장하므로 데이터 의존성
- 배리어를 대신할 수 있습니다.
+ 읽기 메모리 배리어는 주소 의존성 배리어를 내장하므로 주소 의존성 배리어를
+ 대신할 수 있습니다.
[!] 읽기 배리어는 일반적으로 쓰기 배리어들과 함께 짝을 맞춰 사용되어야
합니다; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
@@ -571,16 +575,20 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE �
Documentation/core-api/dma-api.rst
-데이터 의존성 배리어 (역사적)
------------------------------
+주소 의존성 배리어 (역사적)
+---------------------------
리눅스 커널 v4.15 기준으로, smp_mb() 가 DEC Alpha 용 READ_ONCE() 코드에
추가되었는데, 이는 이 섹션에 주의를 기울여야 하는 사람들은 DEC Alpha 아키텍쳐
전용 코드를 만드는 사람들과 READ_ONCE() 자체를 만드는 사람들 뿐임을 의미합니다.
-그런 분들을 위해, 그리고 역사에 관심 있는 분들을 위해, 여기 데이터 의존성
+그런 분들을 위해, 그리고 역사에 관심 있는 분들을 위해, 여기 주소 의존성
배리어에 대한 이야기를 적습니다.
-데이터 의존성 배리어의 사용에 있어 지켜야 하는 사항들은 약간 미묘하고, 데이터
+[!] 주소 의존성은 로드에서 로드로와 로드에서 스토어로의 관계들 모두에서
+나타나지만, 주소 의존성 배리어는 로드에서 스토어로의 상황에서는 필요하지
+않습니다.
+
+주소 의존성 배리어의 사용에 있어 지켜야 하는 사항들은 약간 미묘하고, 데이터
의존성 배리어가 사용되어야 하는 상황도 항상 명백하지는 않습니다. 설명을 위해
다음의 이벤트 시퀀스를 생각해 봅시다:
@@ -590,10 +598,13 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE �
B = 4;
<쓰기 배리어>
WRITE_ONCE(P, &B)
- Q = READ_ONCE(P);
+ Q = READ_ONCE_OLD(P);
D = *Q;
-여기엔 분명한 데이터 의존성이 존재하므로, 이 시퀀스가 끝났을 때 Q 는 &A 또는 &B
+[!] READ_ONCE_OLD() 는 4.15 커널 전의 버전에서의, 주소 의존성 배리어를 내포하지
+않는 READ_ONCE() 에 해당합니다.
+
+여기엔 분명한 주소 의존성이 존재하므로, 이 시퀀스가 끝났을 때 Q 는 &A 또는 &B
일 것이고, 따라서:
(Q == &A) 는 (D == 1) 를,
@@ -608,8 +619,8 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE �
그렇지 않습니다, 그리고 이 현상은 (DEC Alpha 와 같은) 여러 CPU 에서 실제로
발견될 수 있습니다.
-이 문제 상황을 제대로 해결하기 위해, 데이터 의존성 배리어나 그보다 강화된
-무언가가 주소를 읽어올 때와 데이터를 읽어올 때 사이에 추가되어야만 합니다:
+이 문제 상황을 제대로 해결하기 위해, READ_ONCE() 는 커널 v4.15 릴리즈 부터
+묵시적 주소 의존성 배리어를 제공합니다:
CPU 1 CPU 2
=============== ===============
@@ -618,7 +629,7 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE �
<쓰기 배리어>
WRITE_ONCE(P, &B);
Q = READ_ONCE(P);
- <데이터 의존성 배리어>
+ <묵시적 주소 의존성 배리어>
D = *Q;
이 변경은 앞의 처음 두가지 결과 중 하나만이 발생할 수 있고, 세번째의 결과는
@@ -634,7 +645,7 @@ P 는 짝수 번호 캐시 라인에 저장되어 있고, 변수 B 는 홀수 �
중이라면 포인터 P (&B) 의 새로운 값과 변수 B 의 기존 값 (2) 를 볼 수 있습니다.
-의존적 쓰기들의 순서를 맞추는데에는 데이터 의존성 배리어가 필요치 않은데, 이는
+의존적 쓰기들의 순서를 맞추는데에는 주소 의존성 배리어가 필요치 않은데, 이는
리눅스 커널이 지원하는 CPU 들은 (1) 쓰기가 정말로 일어날지, (2) 쓰기가 어디에
이루어질지, 그리고 (3) 쓰여질 값을 확실히 알기 전까지는 쓰기를 수행하지 않기
때문입니다. 하지만 "컨트롤 의존성" 섹션과
@@ -647,12 +658,12 @@ Documentation/RCU/rcu_dereference.rst 파일을 주의 깊게 읽어 주시기 �
B = 4;
<쓰기 배리어>
WRITE_ONCE(P, &B);
- Q = READ_ONCE(P);
+ Q = READ_ONCE_OLD(P);
WRITE_ONCE(*Q, 5);
-따라서, Q 로의 읽기와 *Q 로의 쓰기 사이에는 데이터 종속성 배리어가 필요치
-않습니다. 달리 말하면, 데이터 종속성 배리어가 없더라도 다음 결과는 생기지
-않습니다:
+따라서, Q 로의 읽기와 *Q 로의 쓰기 사이에는 주소 의존성 배리어가 필요치
+않습니다. 달리 말하면, 오늘날의 READ_ONCE() 의 묵시적 주소 의존성 배리어가
+없더라도 다음 결과는 생기지 않습니다:
(Q == &B) && (B == 4)
@@ -663,16 +674,16 @@ Documentation/RCU/rcu_dereference.rst 파일을 주의 깊게 읽어 주시기 �
해줍니다.
-데이터 의존성에 의해 제공되는 이 순서규칙은 이를 포함하고 있는 CPU 에
+주소 의존성에 의해 제공되는 이 순서규칙은 이를 포함하고 있는 CPU 에
지역적임을 알아두시기 바랍니다. 더 많은 정보를 위해선 "Multicopy 원자성"
섹션을 참고하세요.
-데이터 의존성 배리어는 매우 중요한데, 예를 들어 RCU 시스템에서 그렇습니다.
+주소 의존성 배리어는 매우 중요한데, 예를 들어 RCU 시스템에서 그렇습니다.
include/linux/rcupdate.h 의 rcu_assign_pointer() 와 rcu_dereference() 를
-참고하세요. 여기서 데이터 의존성 배리어는 RCU 로 관리되는 포인터의 타겟을 현재
-타겟에서 수정된 새로운 타겟으로 바꾸는 작업에서 새로 수정된 타겟이 초기화가
-완료되지 않은 채로 보여지는 일이 일어나지 않게 해줍니다.
+참고하세요. 이것들은 RCU 로 관리되는 포인터의 타겟을 현재 타겟에서 수정된
+새로운 타겟으로 바꾸는 작업에서 새로 수정된 타겟이 초기화가 완료되지 않은 채로
+보여지는 일이 일어나지 않게 해줍니다.
더 많은 예를 위해선 "캐시 일관성" 서브섹션을 참고하세요.
@@ -684,16 +695,17 @@ include/linux/rcupdate.h 의 rcu_assign_pointer() 와 rcu_dereference() 를
약간 다루기 어려울 수 있습니다. 이 섹션의 목적은 여러분이 컴파일러의 무시로
인해 여러분의 코드가 망가지는 걸 막을 수 있도록 돕는겁니다.
-로드-로드 컨트롤 의존성은 데이터 의존성 배리어만으로는 정확히 동작할 수가
-없어서 읽기 메모리 배리어를 필요로 합니다. 아래의 코드를 봅시다:
+로드-로드 컨트롤 의존성은 (묵시적인) 주소 의존성 배리어만으로는 정확히 동작할
+수가 없어서 읽기 메모리 배리어를 필요로 합니다. 아래의 코드를 봅시다:
q = READ_ONCE(a);
+ <묵시적 주소 의존성 배리어>
if (q) {
- <데이터 의존성 배리어> /* BUG: No data dependency!!! */
+ /* BUG: No address dependency!!! */
p = READ_ONCE(b);
}
-이 코드는 원하는 대로의 효과를 내지 못할 수 있는데, 이 코드에는 데이터 의존성이
+이 코드는 원하는 대로의 효과를 내지 못할 수 있는데, 이 코드에는 주소 의존성이
아니라 컨트롤 의존성이 존재하기 때문으로, 이런 상황에서 CPU 는 실행 속도를 더
빠르게 하기 위해 분기 조건의 결과를 예측하고 코드를 재배치 할 수 있어서 다른
CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레이션보다 먼저 발생한
@@ -930,9 +942,9 @@ CPU 간 상호작용을 다룰 때에 일부 타입의 메모리 배리어는 �
범용 배리어들은 범용 배리어끼리도 짝을 맞추지만 multicopy 원자성이 없는
대부분의 다른 타입의 배리어들과도 짝을 맞춥니다. ACQUIRE 배리어는 RELEASE
배리어와 짝을 맞춥니다만, 둘 다 범용 배리어를 포함해 다른 배리어들과도 짝을
-맞출 수 있습니다. 쓰기 배리어는 데이터 의존성 배리어나 컨트롤 의존성, ACQUIRE
+맞출 수 있습니다. 쓰기 배리어는 주소 의존성 배리어나 컨트롤 의존성, ACQUIRE
배리어, RELEASE 배리어, 읽기 배리어, 또는 범용 배리어와 짝을 맞춥니다.
-비슷하게 읽기 배리어나 컨트롤 의존성, 또는 데이터 의존성 배리어는 쓰기 배리어나
+비슷하게 읽기 배리어나 컨트롤 의존성, 또는 주소 의존성 배리어는 쓰기 배리어나
ACQUIRE 배리어, RELEASE 배리어, 또는 범용 배리어와 짝을 맞추는데, 다음과
같습니다:
@@ -951,7 +963,7 @@ ACQUIRE 배리어, RELEASE 배리어, 또는 범용 배리어와 짝을 맞추�
a = 1;
<쓰기 배리어>
WRITE_ONCE(b, &a); x = READ_ONCE(b);
- <데이터 의존성 배리어>
+ <묵시적 주소 의존성 배리어>
y = *x;
또는:
@@ -970,8 +982,8 @@ ACQUIRE 배리어, RELEASE 배리어, 또는 범용 배리어와 짝을 맞추�
기본적으로, 여기서의 읽기 배리어는 "더 완화된" 타입일 순 있어도 항상 존재해야
합니다.
-[!] 쓰기 배리어 앞의 스토어 오퍼레이션은 일반적으로 읽기 배리어나 데이터
-의존성 배리어 뒤의 로드 오퍼레이션과 매치될 것이고, 반대도 마찬가지입니다:
+[!] 쓰기 배리어 앞의 스토어 오퍼레이션은 일반적으로 읽기 배리어나 주소 의존성
+배리어 뒤의 로드 오퍼레이션과 매치될 것이고, 반대도 마찬가지입니다:
CPU 1 CPU 2
=================== ===================
@@ -1023,7 +1035,7 @@ ACQUIRE 배리어, RELEASE 배리어, 또는 범용 배리어와 짝을 맞추�
V
-둘째, 데이터 의존성 배리어는 데이터 의존적 로드 오퍼레이션들의 부분적 순서
+둘째, 주소 의존성 배리어는 데이터 의존적 로드 오퍼레이션들의 부분적 순서
세우기로 동작합니다. 다음 일련의 이벤트들을 보세요:
CPU 1 CPU 2
@@ -1069,7 +1081,7 @@ ACQUIRE 배리어, RELEASE 배리어, 또는 범용 배리어와 짝을 맞추�
앞의 예에서, CPU 2 는 (B 의 값이 될) *C 의 값 읽기가 C 의 LOAD 뒤에 이어짐에도
B 가 7 이라는 결과를 얻습니다.
-하지만, 만약 데이터 의존성 배리어가 C 의 로드와 *C (즉, B) 의 로드 사이에
+하지만, 만약 주소 의존성 배리어가 C 의 로드와 *C (즉, B) 의 로드 사이에
있었다면:
CPU 1 CPU 2
@@ -1080,7 +1092,7 @@ B 가 7 이라는 결과를 얻습니다.
<쓰기 배리어>
STORE C = &B LOAD X
STORE D = 4 LOAD C (gets &B)
- <데이터 의존성 배리어>
+ <주소 의존성 배리어>
LOAD *C (reads B)
다음과 같이 됩니다:
@@ -1103,7 +1115,7 @@ B 가 7 이라는 결과를 얻습니다.
| +-------+ | |
| | X->9 |------>| |
| +-------+ | |
- C 로의 스토어 앞의 ---> \ ddddddddddddddddd | |
+ C 로의 스토어 앞의 ---> \ aaaaaaaaaaaaaaaaa | |
모든 이벤트 결과가 \ +-------+ | |
뒤의 로드에게 ----->| B->2 |------>| |
보이게 강제한다 +-------+ | |
@@ -1291,7 +1303,7 @@ A 의 로드 두개가 모두 B 의 로드 뒤에 있지만, 서로 다른 값�
즉각 완료한다 : : +-------+
-읽기 배리어나 데이터 의존성 배리어를 두번째 로드 직전에 놓는다면:
+읽기 배리어나 주소 의존성 배리어를 두번째 로드 직전에 놓는다면:
CPU 1 CPU 2
======================= =======================
@@ -1785,21 +1797,20 @@ READ_ONCE(jiffies) 라고 할 필요가 없습니다. READ_ONCE() 와 WRITE_ONC
CPU 메모리 배리어
-----------------
-리눅스 커널은 다음의 여덟개 기본 CPU 메모리 배리어를 가지고 있습니다:
+리눅스 커널은 다음의 일곱개 기본 CPU 메모리 배리어를 가지고 있습니다:
TYPE MANDATORY SMP CONDITIONAL
- =============== ======================= ===========================
+ =============== ======================= ===============
범용 mb() smp_mb()
쓰기 wmb() smp_wmb()
읽기 rmb() smp_rmb()
- 데이터 의존성 READ_ONCE()
+ 주소 의존성 READ_ONCE()
-데이터 의존성 배리어를 제외한 모든 메모리 배리어는 컴파일러 배리어를
-포함합니다. 데이터 의존성은 컴파일러에의 추가적인 순서 보장을 포함하지
-않습니다.
+주소 의존성 배리어를 제외한 모든 메모리 배리어는 컴파일러 배리어를 포함합니다.
+주소 의존성은 컴파일러에의 추가적인 순서 보장을 포함하지 않습니다.
-방백: 데이터 의존성이 있는 경우, 컴파일러는 해당 로드를 올바른 순서로 일으킬
+방백: 주소 의존성이 있는 경우, 컴파일러는 해당 로드를 올바른 순서로 일으킬
것으로 (예: `a[b]` 는 a[b] 를 로드 하기 전에 b 의 값을 먼저 로드한다)
기대되지만, C 언어 사양에는 컴파일러가 b 의 값을 추측 (예: 1 과 같음) 해서
b 로드 전에 a 로드를 하는 코드 (예: tmp = a[1]; if (b != 1) tmp = a[b]; ) 를
@@ -1863,6 +1874,7 @@ Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효�
(*) dma_wmb();
(*) dma_rmb();
+ (*) dma_mb();
이것들은 CPU 와 DMA 가능한 디바이스에서 모두 액세스 가능한 공유 메모리의
읽기, 쓰기 작업들의 순서를 보장하기 위해 consistent memory 에서 사용하기
@@ -1893,12 +1905,13 @@ Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효�
dma_rmb() 는 디스크립터로부터 데이터를 읽어오기 전에 디바이스가 소유권을
내려놓았을 것을 보장하고, dma_wmb() 는 디바이스가 자신이 소유권을 다시
- 가졌음을 보기 전에 디스크립터에 데이터가 쓰였을 것을 보장합니다. 참고로,
- writel() 을 사용하면 캐시 일관성이 있는 메모리 (cache coherent memory)
- 쓰기가 MMIO 영역에의 쓰기 전에 완료되었을 것을 보장하므로 writel() 앞에
- wmb() 를 실행할 필요가 없음을 알아두시기 바랍니다. writel() 보다 비용이
- 저렴한 writel_relaxed() 는 이런 보장을 제공하지 않으므로 여기선 사용되지
- 않아야 합니다.
+ 가졌음을 보기 전에 디스크립터에 데이터가 쓰였을 것을 보장합니다. dma_mb()
+ 는 dma_rmb() 와 dma_wmb() 를 모두 내포합니다. 참고로, writel() 을
+ 사용하면 캐시 일관성이 있는 메모리 (cache coherent memory) 쓰기가 MMIO
+ 영역에의 쓰기 전에 완료되었을 것을 보장하므로 writel() 앞에 wmb() 를
+ 실행할 필요가 없음을 알아두시기 바랍니다. writel() 보다 비용이 저렴한
+ writel_relaxed() 는 이런 보장을 제공하지 않으므로 여기선 사용되지 않아야
+ 합니다.
writel_relaxed() 와 같은 완화된 I/O 접근자들에 대한 자세한 내용을 위해서는
"커널 I/O 배리어의 효과" 섹션을, consistent memory 에 대한 자세한 내용을
@@ -1918,6 +1931,14 @@ Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효�
Persistent memory 에서의 로드를 위해선 현재의 읽기 메모리 배리어로도 읽기
순서를 보장하는데 충분합니다.
+ (*) io_stop_wc();
+
+ 쓰기와 결합된 특성을 갖는 메모리 액세스의 경우 (예: ioremap_wc() 에 의해
+ 리턴되는 것들), CPU 는 앞의 액세스들이 뒤따르는 것들과 병합되게끔 기다릴
+ 수 있습니다. io_stop_wc() 는 그런 기다림이 성능에 영향을 끼칠 수 있을 때,
+ 이 매크로 앞의 쓰기-결합된 메모리 액세스들이 매크로 뒤의 것들과 병합되는
+ 것을 방지하기 위해 사용될 수 있습니다.
+
=========================
암묵적 커널 메모리 배리어
=========================
@@ -2827,9 +2848,9 @@ ld.acq 와 stl.rel 인스트럭션을 각각 만들어 내도록 합니다.
DEC Alpha CPU 는 가장 완화된 메모리 순서의 CPU 중 하나입니다. 뿐만 아니라,
Alpha CPU 의 일부 버전은 분할된 데이터 캐시를 가지고 있어서, 의미적으로
관계되어 있는 두개의 캐시 라인이 서로 다른 시간에 업데이트 되는게 가능합니다.
-이게 데이터 의존성 배리어가 정말 필요해지는 부분인데, 데이터 의존성 배리어는
-메모리 일관성 시스템과 함께 두개의 캐시를 동기화 시켜서, 포인터 변경과 새로운
-데이터의 발견을 올바른 순서로 일어나게 하기 때문입니다.
+이게 주소 의존성 배리어가 정말 필요해지는 부분인데, 주소 의존성 배리어는 메모리
+일관성 시스템과 함께 두개의 캐시를 동기화 시켜서, 포인터 변경과 새로운 데이터의
+발견을 올바른 순서로 일어나게 하기 때문입니다.
리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다만, v4.15
부터는 Alpha 용 READ_ONCE() 코드 내에 smp_mb() 가 추가되어서 메모리 모델로의
diff --git a/Documentation/translations/zh_CN/loongarch/booting.rst b/Documentation/translations/zh_CN/loongarch/booting.rst
new file mode 100644
index 000000000000..fb6440c438f0
--- /dev/null
+++ b/Documentation/translations/zh_CN/loongarch/booting.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/loongarch/booting.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <[email protected]>
+
+====================
+启动 Linux/LoongArch
+====================
+
+:作者: 司延腾 <[email protected]>
+:日期: 2022年11月18日
+
+BootLoader传递给内核的信息
+==========================
+
+LoongArch支持ACPI和FDT启动,需要传递给内核的信息包括memmap、initrd、cmdline、可
+选的ACPI/FDT表等。
+
+内核在 `kernel_entry` 入口处被传递以下参数:
+
+ - a0 = efi_boot: `efi_boot` 是一个标志,表示这个启动环境是否完全符合UEFI
+ 的要求。
+
+ - a1 = cmdline: `cmdline` 是一个指向内核命令行的指针。
+
+ - a2 = systemtable: `systemtable` 指向EFI的系统表,在这个阶段涉及的所有
+ 指针都是物理地址。
+
+Linux/LoongArch内核镜像文件头
+=============================
+
+内核镜像是EFI镜像。作为PE文件,它们有一个64字节的头部结构体,如下所示::
+
+ u32 MZ_MAGIC /* "MZ", MS-DOS 头 */
+ u32 res0 = 0 /* 保留 */
+ u64 kernel_entry /* 内核入口点 */
+ u64 _end - _text /* 内核镜像有效大小 */
+ u64 load_offset /* 加载内核镜像相对内存起始地址的偏移量 */
+ u64 res1 = 0 /* 保留 */
+ u64 res2 = 0 /* 保留 */
+ u64 res3 = 0 /* 保留 */
+ u32 LINUX_PE_MAGIC /* 魔术数 */
+ u32 pe_header - _head /* 到PE头的偏移量 */
diff --git a/Documentation/translations/zh_CN/loongarch/index.rst b/Documentation/translations/zh_CN/loongarch/index.rst
index 7d23eb78379d..0273a08342f7 100644
--- a/Documentation/translations/zh_CN/loongarch/index.rst
+++ b/Documentation/translations/zh_CN/loongarch/index.rst
@@ -14,6 +14,7 @@ LoongArch体系结构
:numbered:
introduction
+ booting
irq-chip-model
features
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eee9f857a986..896914e3a847 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7213,14 +7213,13 @@ veto the transition.
:Parameters: args[0] is the maximum poll time in nanoseconds
:Returns: 0 on success; -1 on error
-This capability overrides the kvm module parameter halt_poll_ns for the
-target VM.
-
-VCPU polling allows a VCPU to poll for wakeup events instead of immediately
-scheduling during guest halts. The maximum time a VCPU can spend polling is
-controlled by the kvm module parameter halt_poll_ns. This capability allows
-the maximum halt time to specified on a per-VM basis, effectively overriding
-the module parameter for the target VM.
+KVM_CAP_HALT_POLL overrides the kvm.halt_poll_ns module parameter to set the
+maximum halt-polling time for all vCPUs in the target VM. This capability can
+be invoked at any time and any number of times to dynamically change the
+maximum halt-polling time.
+
+See Documentation/virt/kvm/halt-polling.rst for more information on halt
+polling.
7.21 KVM_CAP_X86_USER_SPACE_MSR
-------------------------------
diff --git a/Documentation/virt/kvm/x86/halt-polling.rst b/Documentation/virt/kvm/halt-polling.rst
index 4922e4a15f18..3fae39b1a5ba 100644
--- a/Documentation/virt/kvm/x86/halt-polling.rst
+++ b/Documentation/virt/kvm/halt-polling.rst
@@ -119,6 +119,19 @@ These module parameters can be set from the debugfs files in:
Note: that these module parameters are system wide values and are not able to
be tuned on a per vm basis.
+Any changes to these parameters will be picked up by new and existing vCPUs the
+next time they halt, with the notable exception of VMs using KVM_CAP_HALT_POLL
+(see next section).
+
+KVM_CAP_HALT_POLL
+=================
+
+KVM_CAP_HALT_POLL is a VM capability that allows userspace to override halt_poll_ns
+on a per-VM basis. VMs using KVM_CAP_HALT_POLL ignore halt_poll_ns completely (but
+still obey halt_poll_ns_grow, halt_poll_ns_grow_start, and halt_poll_ns_shrink).
+
+See Documentation/virt/kvm/api.rst for more information on this capability.
+
Further Notes
=============
diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst
index e0a2c74e1043..ad13ec55ddfe 100644
--- a/Documentation/virt/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@@ -17,4 +17,5 @@ KVM
locking
vcpu-requests
+ halt-polling
review-checklist
diff --git a/Documentation/virt/kvm/x86/index.rst b/Documentation/virt/kvm/x86/index.rst
index 7ff588826b9f..9ece6b8dc817 100644
--- a/Documentation/virt/kvm/x86/index.rst
+++ b/Documentation/virt/kvm/x86/index.rst
@@ -10,7 +10,6 @@ KVM for x86 systems
amd-memory-encryption
cpuid
errata
- halt-polling
hypercalls
mmu
msr
diff --git a/MAINTAINERS b/MAINTAINERS
index 69565ac0c224..93b5f89308a8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5299,7 +5299,7 @@ M: Johannes Weiner <[email protected]>
M: Michal Hocko <[email protected]>
M: Roman Gushchin <[email protected]>
M: Shakeel Butt <[email protected]>
-R: Muchun Song <[email protected]>
+R: Muchun Song <[email protected]>
S: Maintained
@@ -5585,8 +5585,6 @@ F: drivers/scsi/cxgbi/cxgb3i
CXGB4 CRYPTO DRIVER (chcr)
M: Ayush Sawal <[email protected]>
-M: Vinay Kumar Yadav <[email protected]>
-M: Rohit Maheshwari <[email protected]>
S: Supported
W: http://www.chelsio.com
@@ -5594,8 +5592,6 @@ F: drivers/crypto/chelsio
CXGB4 INLINE CRYPTO DRIVER
M: Ayush Sawal <[email protected]>
-M: Vinay Kumar Yadav <[email protected]>
-M: Rohit Maheshwari <[email protected]>
S: Supported
W: http://www.chelsio.com
@@ -9443,7 +9439,7 @@ F: drivers/net/ethernet/huawei/hinic/
HUGETLB SUBSYSTEM
M: Mike Kravetz <[email protected]>
-M: Muchun Song <[email protected]>
+M: Muchun Song <[email protected]>
S: Maintained
F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -16149,7 +16145,8 @@ F: include/linux/peci-cpu.h
F: include/linux/peci.h
PENSANDO ETHERNET DRIVERS
-M: Shannon Nelson <[email protected]>
+M: Shannon Nelson <[email protected]>
+M: Brett Creeley <[email protected]>
S: Supported
@@ -16664,10 +16661,10 @@ F: net/psample
PSTORE FILESYSTEM
M: Kees Cook <[email protected]>
-M: Anton Vorontsov <[email protected]>
-M: Colin Cross <[email protected]>
-M: Tony Luck <[email protected]>
-S: Maintained
+R: Tony Luck <[email protected]>
+R: Guilherme G. Piccoli <[email protected]>
+S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/pstore
F: Documentation/admin-guide/ramoops.rst
F: Documentation/admin-guide/pstore-blk.rst
@@ -17485,10 +17482,8 @@ S: Maintained
F: drivers/net/wireless/realtek/rtw89/
REDPINE WIRELESS DRIVER
-M: Amitkumar Karwar <[email protected]>
-M: Siva Rebbagondla <[email protected]>
-S: Maintained
+S: Orphan
F: drivers/net/wireless/rsi/
REGISTER MAP ABSTRACTION
diff --git a/Makefile b/Makefile
index 78525ebea876..5ad42c712182 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 1
SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
@@ -562,7 +562,7 @@ KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE
KBUILD_CFLAGS := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE \
-Werror=implicit-function-declaration -Werror=implicit-int \
- -Werror=return-type -Wno-format-security \
+ -Werror=return-type -Wno-format-security -funsigned-char \
-std=gnu11
KBUILD_CPPFLAGS := -D__KERNEL__
KBUILD_RUSTFLAGS := $(rust_common_flags) \
diff --git a/arch/Kconfig b/arch/Kconfig
index 8f138e580d1a..6b95244c3057 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
+config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
+ bool
+
config HAVE_ALIGNED_STRUCT_PAGE
bool
help
diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi
index 7a113325abb9..6f9004ebf424 100644
--- a/arch/arm/boot/dts/at91rm9200.dtsi
+++ b/arch/arm/boot/dts/at91rm9200.dtsi
@@ -666,7 +666,7 @@
compatible = "atmel,at91rm9200-udc";
reg = <0xfffb0000 0x4000>;
interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>;
- clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>;
clock-names = "pclk", "hclk";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index 03d2e8544a4e..0fc9e6b8b05d 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -1270,10 +1270,10 @@
clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>;
};
- gpmi: nand-controller@33002000 {
+ gpmi: nand-controller@33002000{
compatible = "fsl,imx7d-gpmi-nand";
#address-cells = <1>;
- #size-cells = <0>;
+ #size-cells = <1>;
reg = <0x33002000 0x2000>, <0x33004000 0x4000>;
reg-names = "gpmi-nand", "bch";
interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/configs/clps711x_defconfig b/arch/arm/configs/clps711x_defconfig
index 92481b2a88fa..adcee238822a 100644
--- a/arch/arm/configs/clps711x_defconfig
+++ b/arch/arm/configs/clps711x_defconfig
@@ -14,7 +14,8 @@ CONFIG_ARCH_EDB7211=y
CONFIG_ARCH_P720T=y
CONFIG_AEABI=y
# CONFIG_COREDUMP is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig
index 2a2d2cb3ce2e..69341c33e0cc 100644
--- a/arch/arm/configs/collie_defconfig
+++ b/arch/arm/configs/collie_defconfig
@@ -13,7 +13,8 @@ CONFIG_CMDLINE="noinitrd root=/dev/mtdblock2 rootfstype=jffs2 fbcon=rotate:1"
CONFIG_FPE_NWFPE=y
CONFIG_PM=y
# CONFIG_SWAP is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig
index e2fd822f741a..b60000a89aff 100644
--- a/arch/arm/configs/multi_v4t_defconfig
+++ b/arch/arm/configs/multi_v4t_defconfig
@@ -25,7 +25,8 @@ CONFIG_ARM_CLPS711X_CPUIDLE=y
CONFIG_JUMP_LABEL=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_COREDUMP is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 70511fe4b3ec..246f1bba7df5 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -42,7 +42,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_BINFMT_MISC=y
# CONFIG_SWAP is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index d60cc9cc4c21..0a0f12df40b5 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -49,7 +49,8 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_LDM_PARTITION=y
CONFIG_CMDLINE_PARTITION=y
CONFIG_BINFMT_MISC=y
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
# CONFIG_COMPACTION is not set
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/arm/configs/tct_hammer_defconfig b/arch/arm/configs/tct_hammer_defconfig
index 3b29ae1fb750..6bd38b6f22c4 100644
--- a/arch/arm/configs/tct_hammer_defconfig
+++ b/arch/arm/configs/tct_hammer_defconfig
@@ -19,7 +19,8 @@ CONFIG_FPE_NWFPE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_SWAP is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig
index ea59e4b6bfc5..6bd9f71b71fc 100644
--- a/arch/arm/configs/xcep_defconfig
+++ b/arch/arm/configs/xcep_defconfig
@@ -26,7 +26,8 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
# CONFIG_BLOCK is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_NET=y
diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c
index 67ed68fbe3a5..bf2b5c6a18c6 100644
--- a/arch/arm/mach-at91/sama5.c
+++ b/arch/arm/mach-at91/sama5.c
@@ -26,7 +26,7 @@ static void sama5_l2c310_write_sec(unsigned long val, unsigned reg)
static void __init sama5_secure_cache_init(void)
{
sam_secure_init();
- if (sam_linux_is_optee_available())
+ if (IS_ENABLED(CONFIG_OUTER_CACHE) && sam_linux_is_optee_available())
outer_cache.write_sec = sama5_l2c310_write_sec;
}
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 46cccd6bf705..de988cba9a4b 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -105,6 +105,19 @@ static inline bool is_write_fault(unsigned int fsr)
return (fsr & FSR_WRITE) && !(fsr & FSR_CM);
}
+static inline bool is_translation_fault(unsigned int fsr)
+{
+ int fs = fsr_fs(fsr);
+#ifdef CONFIG_ARM_LPAE
+ if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL)
+ return true;
+#else
+ if (fs == FS_L1_TRANS || fs == FS_L2_TRANS)
+ return true;
+#endif
+ return false;
+}
+
static void die_kernel_fault(const char *msg, struct mm_struct *mm,
unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
@@ -140,7 +153,8 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
if (addr < PAGE_SIZE) {
msg = "NULL pointer dereference";
} else {
- if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
+ if (is_translation_fault(fsr) &&
+ kfence_handle_page_fault(addr, is_write_fault(fsr), regs))
return;
msg = "paging request";
@@ -208,7 +222,7 @@ static inline bool is_permission_fault(unsigned int fsr)
{
int fs = fsr_fs(fsr);
#ifdef CONFIG_ARM_LPAE
- if ((fs & FS_PERM_NOLL_MASK) == FS_PERM_NOLL)
+ if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL)
return true;
#else
if (fs == FS_L1_PERM || fs == FS_L2_PERM)
diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h
index 83b5ab32d7a4..54927ba1fa6e 100644
--- a/arch/arm/mm/fault.h
+++ b/arch/arm/mm/fault.h
@@ -14,8 +14,9 @@
#ifdef CONFIG_ARM_LPAE
#define FSR_FS_AEA 17
+#define FS_TRANS_NOLL 0x4
#define FS_PERM_NOLL 0xC
-#define FS_PERM_NOLL_MASK 0x3C
+#define FS_MMU_NOLL_MASK 0x3C
static inline int fsr_fs(unsigned int fsr)
{
@@ -23,8 +24,10 @@ static inline int fsr_fs(unsigned int fsr)
}
#else
#define FSR_FS_AEA 22
-#define FS_L1_PERM 0xD
-#define FS_L2_PERM 0xF
+#define FS_L1_TRANS 0x5
+#define FS_L2_TRANS 0x7
+#define FS_L1_PERM 0xD
+#define FS_L2_PERM 0xF
static inline int fsr_fs(unsigned int fsr)
{
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 505c8a1ccbe0..099ee812f3f1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -31,6 +31,7 @@ config ARM64
select ARCH_HAS_KCOV
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index d6cf535d8352..439e2bc5d5d8 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -14,16 +14,8 @@
#ifdef CONFIG_EFI
extern void efi_init(void);
-
-bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg);
#else
#define efi_init()
-
-static inline
-bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
-{
- return false;
-}
#endif
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 67babd5f04c2..75691a2641c1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -6,7 +6,7 @@
#include <linux/linkage.h>
SYM_FUNC_START(__efi_rt_asm_wrapper)
- stp x29, x30, [sp, #-112]!
+ stp x29, x30, [sp, #-32]!
mov x29, sp
/*
@@ -17,20 +17,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
stp x1, x18, [sp, #16]
/*
- * Preserve all callee saved registers and record the stack pointer
- * value in a per-CPU variable so we can recover from synchronous
- * exceptions occurring while running the firmware routines.
- */
- stp x19, x20, [sp, #32]
- stp x21, x22, [sp, #48]
- stp x23, x24, [sp, #64]
- stp x25, x26, [sp, #80]
- stp x27, x28, [sp, #96]
-
- adr_this_cpu x8, __efi_rt_asm_recover_sp, x9
- str x29, [x8]
-
- /*
* We are lucky enough that no EFI runtime services take more than
* 5 arguments, so all are passed in registers rather than via the
* stack.
@@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
ldp x1, x2, [sp, #16]
cmp x2, x18
- ldp x29, x30, [sp], #112
+ ldp x29, x30, [sp], #32
b.ne 0f
ret
0:
@@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper)
mov x18, x2
b efi_handle_corrupted_x18 // tail call
SYM_FUNC_END(__efi_rt_asm_wrapper)
-
-SYM_FUNC_START(__efi_rt_asm_recover)
- ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9
- mov sp, x8
-
- ldp x0, x18, [sp, #16]
- ldp x19, x20, [sp, #32]
- ldp x21, x22, [sp, #48]
- ldp x23, x24, [sp, #64]
- ldp x25, x26, [sp, #80]
- ldp x27, x28, [sp, #96]
- ldp x29, x30, [sp], #112
-
- b efi_handle_runtime_exception
-SYM_FUNC_END(__efi_rt_asm_recover)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index ee53f2a0aa03..a908a37f0367 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -9,7 +9,6 @@
#include <linux/efi.h>
#include <linux/init.h>
-#include <linux/percpu.h>
#include <asm/efi.h>
@@ -145,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f);
return s;
}
-
-asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp);
-
-asmlinkage efi_status_t __efi_rt_asm_recover(void);
-
-asmlinkage efi_status_t efi_handle_runtime_exception(const char *f)
-{
- pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f);
- clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- return EFI_ABORTED;
-}
-
-bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
-{
- /* Check whether the exception occurred while running the firmware */
- if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64)
- return false;
-
- pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg);
- add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- dump_stack();
-
- regs->pc = (u64)__efi_rt_asm_recover;
- return true;
-}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3cb101e8cb29..5240f6acad64 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -36,7 +36,22 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
{
unsigned long start = (unsigned long)page_address(page);
- dcache_clean_poc(start, start + size);
+ /*
+ * The architecture only requires a clean to the PoC here in order to
+ * meet the requirements of the DMA API. However, some vendors (i.e.
+ * Qualcomm) abuse the DMA API for transferring buffers from the
+ * non-secure to the secure world, resetting the system if a non-secure
+ * access shows up after the buffer has been transferred:
+ *
+ * https://lore.kernel.org/r/[email protected]
+ *
+ * Using clean+invalidate appears to make this issue less likely, but
+ * the drivers themselves still need fixing as the CPU could issue a
+ * speculative read from the buffer via the linear mapping irrespective
+ * of the cache maintenance we use. Once the drivers are fixed, we can
+ * relax this to a clean operation.
+ */
+ dcache_clean_inval_poc(start, start + size);
}
#ifdef CONFIG_IOMMU_DMA
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 3e9cf9826417..5b391490e045 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -30,7 +30,6 @@
#include <asm/bug.h>
#include <asm/cmpxchg.h>
#include <asm/cpufeature.h>
-#include <asm/efi.h>
#include <asm/exception.h>
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
@@ -392,9 +391,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr,
msg = "paging request";
}
- if (efi_runtime_fixup_exception(regs, msg))
- return;
-
die_kernel_fault(msg, addr, esr, regs);
}
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 903096bd87f8..386adde2feff 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -10,6 +10,7 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
+ select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_INLINE_READ_LOCK if !PREEMPTION
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index aa0e0e0d4ee5..79d5bfd913e0 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -490,6 +490,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_ACCESSED);
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index 3dd172d9ffea..d82687390b4a 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -78,16 +78,6 @@ extern void calculate_cpu_foreign_map(void);
*/
extern void show_ipi_list(struct seq_file *p, int prec);
-/*
- * This function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-static inline void smp_send_reschedule(int cpu)
-{
- loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
-}
-
static inline void arch_send_call_function_single_ipi(int cpu)
{
loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION);
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 6ed72f7ff278..14508d429ffa 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -149,6 +149,17 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action)
ipi_write_action(cpu_logical_map(i), (u32)action);
}
+/*
+ * This function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+ loongson_send_ipi_single(cpu, SMP_RESCHEDULE);
+}
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
+
irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
{
unsigned int action;
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index d8ee8fbc8c67..58781c6e4191 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -10,6 +10,8 @@
#include <asm/regdef.h>
#include <asm/stackframe.h>
+#define INVTLB_ADDR_GFALSE_AND_ASID 5
+
#define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3)
#define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3)
#define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3)
@@ -136,13 +138,10 @@ tlb_huge_update_load:
ori t0, ra, _PAGE_VALID
st.d t0, t1, 0
#endif
- tlbsrch
- addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
- tlbwr
-
- csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
+ csrrd ra, LOONGARCH_CSR_ASID
+ csrrd t1, LOONGARCH_CSR_BADV
+ andi ra, ra, CSR_ASID_ASID
+ invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1
/*
* A huge PTE describes an area the size of the
@@ -287,13 +286,11 @@ tlb_huge_update_store:
ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
st.d t0, t1, 0
#endif
- tlbsrch
- addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
- tlbwr
+ csrrd ra, LOONGARCH_CSR_ASID
+ csrrd t1, LOONGARCH_CSR_BADV
+ andi ra, ra, CSR_ASID_ASID
+ invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1
- csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
/*
* A huge PTE describes an area the size of the
* configured huge page size. This is twice the
@@ -436,6 +433,11 @@ tlb_huge_update_modify:
ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
st.d t0, t1, 0
#endif
+ csrrd ra, LOONGARCH_CSR_ASID
+ csrrd t1, LOONGARCH_CSR_BADV
+ andi ra, ra, CSR_ASID_ASID
+ invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1
+
/*
* A huge PTE describes an area the size of the
* configured huge page size. This is twice the
@@ -466,7 +468,7 @@ tlb_huge_update_modify:
addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
- tlbwr
+ tlbfill
/* Reset default page size */
addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c
index 557d60867f98..6fdc13610565 100644
--- a/arch/m68k/emu/nfcon.c
+++ b/arch/m68k/emu/nfcon.c
@@ -49,7 +49,7 @@ static void nfcon_write(struct console *con, const char *str,
static struct tty_driver *nfcon_device(struct console *con, int *index)
{
*index = 0;
- return (con->flags & CON_ENABLED) ? nfcon_tty_driver : NULL;
+ return console_is_registered(con) ? nfcon_tty_driver : NULL;
}
static struct console nf_console = {
@@ -107,6 +107,11 @@ static int __init nf_debug_setup(char *arg)
stderr_id = nf_get_id("NF_STDERR");
if (stderr_id) {
+ /*
+ * The console will be enabled when debug=nfcon is specified
+ * as a kernel parameter. Since this is a non-standard way
+ * of enabling consoles, it must be explicitly enabled.
+ */
nf_console.flags |= CON_ENABLED;
register_console(&nf_console);
}
@@ -151,7 +156,7 @@ static int __init nfcon_init(void)
nfcon_tty_driver = driver;
- if (!(nf_console.flags & CON_ENABLED))
+ if (!console_is_registered(&nf_console))
register_console(&nf_console);
return 0;
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 6caec386ad2f..4678627673df 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_ACCESSED);
diff --git a/arch/openrisc/configs/or1ksim_defconfig b/arch/openrisc/configs/or1ksim_defconfig
index 6e1e004047c7..0116e465238f 100644
--- a/arch/openrisc/configs/or1ksim_defconfig
+++ b/arch/openrisc/configs/or1ksim_defconfig
@@ -10,7 +10,8 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_MODULES=y
# CONFIG_BLOCK is not set
CONFIG_OPENRISC_BUILTIN_DTB="or1ksim"
diff --git a/arch/openrisc/configs/simple_smp_defconfig b/arch/openrisc/configs/simple_smp_defconfig
index ff49d868e040..b990cb6c9309 100644
--- a/arch/openrisc/configs/simple_smp_defconfig
+++ b/arch/openrisc/configs/simple_smp_defconfig
@@ -16,7 +16,8 @@ CONFIG_EXPERT=y
# CONFIG_AIO is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_MODULES=y
# CONFIG_BLOCK is not set
CONFIG_OPENRISC_BUILTIN_DTB="simple_smp"
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 4745bb9998bd..6d8492b6e2b8 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs)
/* kernel/traps.c */
DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
#ifdef CONFIG_PPC_BOOK3S_64
+DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot);
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
#endif
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 43f1c76d48ce..a379b0ce19ff 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
- /* First arg comes in as a 32 bits pointer. */
- EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
- EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
+ /* Initialize tail_call_cnt, to be skipped if we do tail calls. */
+ EMIT(PPC_RAW_LI(_R4, 0));
+
+#define BPF_TAILCALL_PROLOGUE_SIZE 4
+
EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
- /*
- * Initialize tail_call_cnt in stack frame if we do tail calls.
- * Otherwise, put in NOPs so that it can be skipped when we are
- * invoked through a tail call.
- */
if (ctx->seen & SEEN_TAILCALL)
- EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1,
- bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
- else
- EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
-#define BPF_TAILCALL_PROLOGUE_SIZE 16
+ /* First arg comes in as a 32 bits pointer. */
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
+ EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
/*
* We need a stack frame, but we don't necessarily need to
@@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
if (bpf_is_seen_register(ctx, i))
EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
-}
-
-void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
-{
- EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
-
- bpf_jit_emit_common_epilogue(image, ctx);
-
- /* Tear down our stack frame */
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+ /* Tear down our stack frame */
EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_MTLR(_R0));
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
+
+ bpf_jit_emit_common_epilogue(image, ctx);
+
EMIT(PPC_RAW_BLR());
}
@@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
- EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
/*
* if (prog == NULL)
@@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
/* goto *(prog->bpf_func + prologue_size); */
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
-
- if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
-
EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
-
- if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_MTLR(_R0));
-
EMIT(PPC_RAW_MTCTR(_R3));
EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1)));
+ /* Put tail_call_cnt in r4 */
+ EMIT(PPC_RAW_MR(_R4, _R0));
+
/* tear restore NVRs, ... */
bpf_jit_emit_common_epilogue(image, ctx);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index fa78595a6089..593cf09264d8 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -317,9 +317,9 @@ config SMP
config NR_CPUS
int "Maximum number of CPUs (2-512)"
depends on SMP
- range 2 512 if !SBI_V01
- range 2 32 if SBI_V01 && 32BIT
- range 2 64 if SBI_V01 && 64BIT
+ range 2 512 if !RISCV_SBI_V01
+ range 2 32 if RISCV_SBI_V01 && 32BIT
+ range 2 64 if RISCV_SBI_V01 && 64BIT
default "32" if 32BIT
default "64" if 64BIT
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
index 96fe8def644c..79b3ccd58ff0 100644
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -25,7 +25,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_NONPORTABLE=y
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
index 379740654373..6b80bb13b8ed 100644
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -17,7 +17,8 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_NONPORTABLE=y
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
index 1a56eda5ce46..4cf0f297091e 100644
--- a/arch/riscv/configs/nommu_virt_defconfig
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -22,7 +22,8 @@ CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
CONFIG_NONPORTABLE=y
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 1b471ff73178..816e753de636 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -23,6 +23,7 @@
#define REG_L __REG_SEL(ld, lw)
#define REG_S __REG_SEL(sd, sw)
#define REG_SC __REG_SEL(sc.d, sc.w)
+#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq)
#define REG_ASM __REG_SEL(.dword, .word)
#define SZREG __REG_SEL(8, 4)
#define LGREG __REG_SEL(3, 2)
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
index f74879a8f1ea..e229d7be4b66 100644
--- a/arch/riscv/include/asm/efi.h
+++ b/arch/riscv/include/asm/efi.h
@@ -10,6 +10,7 @@
#include <asm/mmu_context.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
#ifdef CONFIG_EFI
extern void efi_init(void);
@@ -20,7 +21,10 @@ extern void efi_init(void);
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
-#define arch_efi_call_virt_setup() efi_virtmap_load()
+#define arch_efi_call_virt_setup() ({ \
+ sync_kernel_mappings(efi_mm.pgd); \
+ efi_virtmap_load(); \
+ })
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 947f23d7b6af..59dc12b5b7e8 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
#endif /* __PAGETABLE_PMD_FOLDED */
+static inline void sync_kernel_mappings(pgd_t *pgd)
+{
+ memcpy(pgd + USER_PTRS_PER_PGD,
+ init_mm.pgd + USER_PTRS_PER_PGD,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
@@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
if (likely(pgd != NULL)) {
memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
/* Copy kernel mappings */
- memcpy(pgd + USER_PTRS_PER_PGD,
- init_mm.pgd + USER_PTRS_PER_PGD,
- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+ sync_kernel_mappings(pgd);
}
return pgd;
}
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7ec936910a96..92ec2d9d7273 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd)
return pte_dirty(pmd_pte(pmd));
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return pte_young(pmd_pte(pmd));
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index d3443be7eedc..3831b638ecab 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops);
/* Clear IPI for current CPU */
void riscv_clear_ipi(void);
+/* Check other CPUs stop or not */
+bool smp_crash_stop_failed(void);
+
/* Secondary hart entry */
asmlinkage void smp_callin(void);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b9eda3fcbd6d..186abd146eaf 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -404,6 +404,19 @@ handle_syscall_trace_exit:
#ifdef CONFIG_VMAP_STACK
handle_kernel_stack_overflow:
+ /*
+ * Takes the psuedo-spinlock for the shadow stack, in case multiple
+ * harts are concurrently overflowing their kernel stacks. We could
+ * store any value here, but since we're overflowing the kernel stack
+ * already we only have SP to use as a scratch register. So we just
+ * swap in the address of the spinlock, as that's definately non-zero.
+ *
+ * Pairs with a store_release in handle_bad_stack().
+ */
+1: la sp, spin_shadow_stack
+ REG_AMOSWAP_AQ sp, sp, (sp)
+ bnez sp, 1b
+
la sp, shadow_stack
addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index ee79e6839b86..2d139b724bc8 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -15,6 +15,8 @@
#include <linux/compiler.h> /* For unreachable() */
#include <linux/cpu.h> /* For cpu_down() */
#include <linux/reboot.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
/*
* kexec_image_info - Print received image details
@@ -138,20 +140,35 @@ void machine_shutdown(void)
#endif
}
-/* Override the weak function in kernel/panic.c */
-void crash_smp_send_stop(void)
+static void machine_kexec_mask_interrupts(void)
{
- static int cpus_stopped;
+ unsigned int i;
+ struct irq_desc *desc;
- /*
- * This function can be called twice in panic path, but obviously
- * we execute this only once.
- */
- if (cpus_stopped)
- return;
+ for_each_irq_desc(i, desc) {
+ struct irq_chip *chip;
+ int ret;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip)
+ continue;
+
+ /*
+ * First try to remove the active state. If this
+ * fails, try to EOI the interrupt.
+ */
+ ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
+
+ if (ret && irqd_irq_inprogress(&desc->irq_data) &&
+ chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
- smp_send_stop();
- cpus_stopped = 1;
+ if (chip->irq_mask)
+ chip->irq_mask(&desc->irq_data);
+
+ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+ chip->irq_disable(&desc->irq_data);
+ }
}
/*
@@ -169,6 +186,8 @@ machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
crash_save_cpu(regs, smp_processor_id());
+ machine_kexec_mask_interrupts();
+
pr_info("Starting crashdump kernel...\n");
}
@@ -195,6 +214,11 @@ machine_kexec(struct kimage *image)
void *control_code_buffer = page_address(image->control_code_page);
riscv_kexec_method kexec_method = NULL;
+#ifdef CONFIG_SMP
+ WARN(smp_crash_stop_failed(),
+ "Some CPUs may be stale, kdump will be unreliable.\n");
+#endif
+
if (image->type != KEXEC_TYPE_CRASH)
kexec_method = control_code_buffer;
else
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 67ec1fadcfe2..86acd690d529 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -322,10 +322,11 @@ subsys_initcall(topology_init);
void free_initmem(void)
{
- if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
- set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end),
- IS_ENABLED(CONFIG_64BIT) ?
- set_memory_rw : set_memory_rw_nx);
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+ set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx);
+ if (IS_ENABLED(CONFIG_64BIT))
+ set_kernel_memory(__init_begin, __init_end, set_memory_nx);
+ }
free_initmem_default(POISON_FREE_INITMEM);
}
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 760a64518c58..8c3b59f1f9b8 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -12,6 +12,7 @@
#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/kexec.h>
#include <linux/profile.h>
#include <linux/smp.h>
#include <linux/sched.h>
@@ -22,11 +23,13 @@
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
enum ipi_message_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
+ IPI_CPU_CRASH_STOP,
IPI_IRQ_WORK,
IPI_TIMER,
IPI_MAX
@@ -71,6 +74,32 @@ static void ipi_stop(void)
wait_for_interrupt();
}
+#ifdef CONFIG_KEXEC_CORE
+static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
+
+static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+ crash_save_cpu(regs, cpu);
+
+ atomic_dec(&waiting_for_crash_ipi);
+
+ local_irq_disable();
+
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu_has_hotplug(cpu))
+ cpu_ops[cpu]->cpu_stop();
+#endif
+
+ for(;;)
+ wait_for_interrupt();
+}
+#else
+static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+ unreachable();
+}
+#endif
+
static const struct riscv_ipi_ops *ipi_ops __ro_after_init;
void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
@@ -124,8 +153,9 @@ void arch_irq_work_raise(void)
void handle_IPI(struct pt_regs *regs)
{
- unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
- unsigned long *stats = ipi_data[smp_processor_id()].stats;
+ unsigned int cpu = smp_processor_id();
+ unsigned long *pending_ipis = &ipi_data[cpu].bits;
+ unsigned long *stats = ipi_data[cpu].stats;
riscv_clear_ipi();
@@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs)
ipi_stop();
}
+ if (ops & (1 << IPI_CPU_CRASH_STOP)) {
+ ipi_cpu_crash_stop(cpu, get_irq_regs());
+ }
+
if (ops & (1 << IPI_IRQ_WORK)) {
stats[IPI_IRQ_WORK]++;
irq_work_run();
@@ -176,6 +210,7 @@ static const char * const ipi_names[] = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
+ [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
[IPI_TIMER] = "Timer broadcast interrupts",
};
@@ -235,6 +270,64 @@ void smp_send_stop(void)
cpumask_pr_args(cpu_online_mask));
}
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * The number of CPUs online, not counting this CPU (which may not be
+ * fully online and so not counted in num_online_cpus()).
+ */
+static inline unsigned int num_other_online_cpus(void)
+{
+ unsigned int this_cpu_online = cpu_online(smp_processor_id());
+
+ return num_online_cpus() - this_cpu_online;
+}
+
+void crash_smp_send_stop(void)
+{
+ static int cpus_stopped;
+ cpumask_t mask;
+ unsigned long timeout;
+
+ /*
+ * This function can be called twice in panic path, but obviously
+ * we execute this only once.
+ */
+ if (cpus_stopped)
+ return;
+
+ cpus_stopped = 1;
+
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ return;
+
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
+
+ pr_crit("SMP: stopping secondary CPUs\n");
+ send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
+
+ /* Wait up to one second for other CPUs to stop */
+ timeout = USEC_PER_SEC;
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
+ udelay(1);
+
+ if (atomic_read(&waiting_for_crash_ipi) > 0)
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+}
+
+bool smp_crash_stop_failed(void)
+{
+ return (atomic_read(&waiting_for_crash_ipi) > 0);
+}
+#endif
+
void smp_send_reschedule(int cpu)
{
send_ipi_single(cpu, IPI_RESCHEDULE);
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f3e96d60a2ff..7abd8e4c4df6 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -221,11 +221,29 @@ asmlinkage unsigned long get_overflow_stack(void)
OVERFLOW_STACK_SIZE;
}
+/*
+ * A pseudo spinlock to protect the shadow stack from being used by multiple
+ * harts concurrently. This isn't a real spinlock because the lock side must
+ * be taken without a valid stack and only a single register, it's only taken
+ * while in the process of panicing anyway so the performance and error
+ * checking a proper spinlock gives us doesn't matter.
+ */
+unsigned long spin_shadow_stack;
+
asmlinkage void handle_bad_stack(struct pt_regs *regs)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
+ /*
+ * We're done with the shadow stack by this point, as we're on the
+ * overflow stack. Tell any other concurrent overflowing harts that
+ * they can proceed with panicing by releasing the pseudo-spinlock.
+ *
+ * This pairs with an amoswap.aq in handle_kernel_stack_overflow.
+ */
+ smp_store_release(&spin_shadow_stack, 0);
+
console_verbose();
pr_emerg("Insufficient stack space to handle exception!\n");
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index db6548509bb3..06e6b27f3bcc 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -17,6 +17,7 @@ vdso-syms += flush_icache
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
ccflags-y := -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING
ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index de575af02ffe..48a11deb502c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -73,6 +73,7 @@ config S390
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
select ARCH_HAS_MEM_ENCRYPT
+ select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME
select ARCH_HAS_SET_MEMORY
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f1cb9391190d..11e901286414 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 94138f8f0c1c..ace2541ababd 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -546,8 +546,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
scb_s->eca |= scb_o->eca & ECA_CEI;
/* Epoch Extension */
- if (test_kvm_facility(vcpu->kvm, 139))
+ if (test_kvm_facility(vcpu->kvm, 139)) {
scb_s->ecd |= scb_o->ecd & ECD_MEF;
+ scb_s->epdx = scb_o->epdx;
+ }
/* etoken */
if (test_kvm_facility(vcpu->kvm, 156))
diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig
index 619c18699459..376e95fa77bc 100644
--- a/arch/sh/configs/rsk7201_defconfig
+++ b/arch/sh/configs/rsk7201_defconfig
@@ -10,7 +10,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_AIO is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig
index d00fafc021e1..1d5fd67a3949 100644
--- a/arch/sh/configs/rsk7203_defconfig
+++ b/arch/sh/configs/rsk7203_defconfig
@@ -11,7 +11,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig
index 122216123e63..78e0e7be57ee 100644
--- a/arch/sh/configs/se7206_defconfig
+++ b/arch/sh/configs/se7206_defconfig
@@ -21,7 +21,8 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_ELF_CORE is not set
# CONFIG_COMPAT_BRK is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig
index c0b6f40d01cc..e078b193a78a 100644
--- a/arch/sh/configs/shmin_defconfig
+++ b/arch/sh/configs/shmin_defconfig
@@ -9,7 +9,8 @@ CONFIG_LOG_BUF_SHIFT=14
# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SHMEM is not set
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH7706=y
CONFIG_MEMORY_START=0x0c000000
diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig
index 32ec6eb1eabc..aa353dff7f19 100644
--- a/arch/sh/configs/shx3_defconfig
+++ b/arch/sh/configs/shx3_defconfig
@@ -20,7 +20,8 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_KALLSYMS_ALL=y
-CONFIG_SLOB=y
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index a779418ceba9..3bc9736bddb1 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd)
return pte_dirty(pte);
}
+#define pmd_young pmd_young
static inline unsigned long pmd_young(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index 0224fcb36e22..427dd5a61a38 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -16,20 +16,26 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
struct console *con;
unsigned long flags;
size_t len = 0;
+ int cookie;
- /* only dump kmsg when no console is available */
- if (!console_trylock())
- return;
+ /*
+ * If no consoles are available to output crash information, dump
+ * the kmsg buffer to stdout.
+ */
- for_each_console(con) {
- if(strcmp(con->name, "tty") == 0 &&
- (con->flags & (CON_ENABLED | CON_CONSDEV)) != 0) {
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
+ /*
+ * The ttynull console and disabled consoles are ignored
+ * since they cannot output. All other consoles are
+ * expected to output the crash information.
+ */
+ if (strcmp(con->name, "ttynull") != 0 &&
+ (console_srcu_read_flags(con) & CON_ENABLED)) {
break;
}
}
-
- console_unlock();
-
+ console_srcu_read_unlock(cookie);
if (con)
return;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 67745ceab0db..7d11f718ff0c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,6 +81,7 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_PTE_DEVMAP if X86_64
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 03bbcc2fa2ff..35936188db01 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -24,7 +24,7 @@ struct p4_event_bind {
unsigned int escr_msr[2]; /* ESCR MSR for this event */
unsigned int escr_emask; /* valid ESCR EventMask bits */
unsigned int shared; /* event is shared across threads */
- char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */
+ signed char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */
};
struct p4_pebs_bind {
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c936ce9f0c47..dfdb103ae4f6 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
-extern void write_spec_ctrl_current(u64 val, bool force);
+extern void update_spec_ctrl_cond(u64 val);
extern u64 spec_ctrl_current(void);
/*
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5059799bebe3..286a71810f9e 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_DIRTY;
}
+#define pmd_young pmd_young
static inline int pmd_young(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_ACCESSED;
@@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void)
return true;
}
+#ifdef CONFIG_XEN_PV
+#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
+static inline bool arch_has_hw_nonleaf_pmd_young(void)
+{
+ return !cpu_feature_enabled(X86_FEATURE_XENPV);
+}
+#endif
+
#ifdef CONFIG_PAGE_TABLE_CHECK
static inline bool pte_user_accessible_page(pte_t pte)
{
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3e3230cccaa7..6daf84229548 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
static DEFINE_MUTEX(spec_ctrl_mutex);
+/* Update SPEC_CTRL MSR and its cached copy unconditionally */
+static void update_spec_ctrl(u64 val)
+{
+ this_cpu_write(x86_spec_ctrl_current, val);
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
/*
* Keep track of the SPEC_CTRL MSR value for the current task, which may differ
* from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
-void write_spec_ctrl_current(u64 val, bool force)
+void update_spec_ctrl_cond(u64 val)
{
if (this_cpu_read(x86_spec_ctrl_current) == val)
return;
@@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force)
* When KERNEL_IBRS this MSR is written on return-to-user, unless
* forced the update can be delayed until that time.
*/
- if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
+ if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
@@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
if (ia32_cap & ARCH_CAP_RRSBA) {
x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void)
if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
switch (mode) {
@@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void)
static void update_stibp_msr(void * __unused)
{
u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
- write_spec_ctrl_current(val, true);
+ update_spec_ctrl(val);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
@@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
}
}
@@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
- write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ update_spec_ctrl(x86_spec_ctrl_base);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c21b7347a26d..e436c9c1ef3b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
}
if (updmsr)
- write_spec_ctrl_current(msr, false);
+ update_spec_ctrl_cond(msr);
}
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2835bd796639..69227f77b201 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10574,8 +10574,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
vcpu->mmio_needed = 0;
r = 0;
+ goto out;
}
- goto out;
}
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
/* Page is swapped out. Do synthetic halt */
diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 23f49a2f4d14..6cceca64a6bc 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -562,17 +562,26 @@ static int initiator_cmp(void *priv, const struct list_head *a,
{
struct memory_initiator *ia;
struct memory_initiator *ib;
- unsigned long *p_nodes = priv;
ia = list_entry(a, struct memory_initiator, node);
ib = list_entry(b, struct memory_initiator, node);
- set_bit(ia->processor_pxm, p_nodes);
- set_bit(ib->processor_pxm, p_nodes);
-
return ia->processor_pxm - ib->processor_pxm;
}
+static int initiators_to_nodemask(unsigned long *p_nodes)
+{
+ struct memory_initiator *initiator;
+
+ if (list_empty(&initiators))
+ return -ENXIO;
+
+ list_for_each_entry(initiator, &initiators, node)
+ set_bit(initiator->processor_pxm, p_nodes);
+
+ return 0;
+}
+
static void hmat_register_target_initiators(struct memory_target *target)
{
static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
@@ -609,7 +618,10 @@ static void hmat_register_target_initiators(struct memory_target *target)
* initiators.
*/
bitmap_zero(p_nodes, MAX_NUMNODES);
- list_sort(p_nodes, &initiators, initiator_cmp);
+ list_sort(NULL, &initiators, initiator_cmp);
+ if (initiators_to_nodemask(p_nodes) < 0)
+ return;
+
if (!access0done) {
for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
loc = localities_types[i];
@@ -643,8 +655,9 @@ static void hmat_register_target_initiators(struct memory_target *target)
/* Access 1 ignores Generic Initiators */
bitmap_zero(p_nodes, MAX_NUMNODES);
- list_sort(p_nodes, &initiators, initiator_cmp);
- best = 0;
+ if (initiators_to_nodemask(p_nodes) < 0)
+ return;
+
for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
loc = localities_types[i];
if (!loc)
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index ddf17e2d266c..b9e336bacf17 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -109,7 +109,7 @@ struct clk *ahci_platform_find_clk(struct ahci_host_priv *hpriv, const char *con
int i;
for (i = 0; i < hpriv->n_clks; i++) {
- if (!strcmp(hpriv->clks[i].id, con_id))
+ if (hpriv->clks[i].id && !strcmp(hpriv->clks[i].id, con_id))
return hpriv->clks[i].clk;
}
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 271963805a38..f05018988a17 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2056,6 +2056,11 @@ static int btusb_setup_csr(struct hci_dev *hdev)
rp = (struct hci_rp_read_local_version *)skb->data;
+ bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x; LMP ver=%u subver=%04x; manufacturer=%u",
+ le16_to_cpu(rp->hci_ver), le16_to_cpu(rp->hci_rev),
+ le16_to_cpu(rp->lmp_ver), le16_to_cpu(rp->lmp_subver),
+ le16_to_cpu(rp->manufacturer));
+
/* Detect a wide host of Chinese controllers that aren't CSR.
*
* Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891
@@ -2118,6 +2123,7 @@ static int btusb_setup_csr(struct hci_dev *hdev)
* without these the controller will lock up.
*/
set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);
+ set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks);
set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks);
set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks);
diff --git a/drivers/char/tpm/eventlog/acpi.c b/drivers/char/tpm/eventlog/acpi.c
index 1b18ce5ebab1..0913d3eb8d51 100644
--- a/drivers/char/tpm/eventlog/acpi.c
+++ b/drivers/char/tpm/eventlog/acpi.c
@@ -90,16 +90,21 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
return -ENODEV;
if (tbl->header.length <
- sizeof(*tbl) + sizeof(struct acpi_tpm2_phy))
+ sizeof(*tbl) + sizeof(struct acpi_tpm2_phy)) {
+ acpi_put_table((struct acpi_table_header *)tbl);
return -ENODEV;
+ }
tpm2_phy = (void *)tbl + sizeof(*tbl);
len = tpm2_phy->log_area_minimum_length;
start = tpm2_phy->log_area_start_address;
- if (!start || !len)
+ if (!start || !len) {
+ acpi_put_table((struct acpi_table_header *)tbl);
return -ENODEV;
+ }
+ acpi_put_table((struct acpi_table_header *)tbl);
format = EFI_TCG2_EVENT_LOG_FORMAT_TCG_2;
} else {
/* Find TCPA entry in RSDT (ACPI_LOGICAL_ADDRESSING) */
@@ -120,8 +125,10 @@ int tpm_read_log_acpi(struct tpm_chip *chip)
break;
}
+ acpi_put_table((struct acpi_table_header *)buff);
format = EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2;
}
+
if (!len) {
dev_warn(&chip->dev, "%s: TCPA log area empty\n", __func__);
return -EIO;
@@ -156,5 +163,4 @@ err:
kfree(log->bios_event_log);
log->bios_event_log = NULL;
return ret;
-
}
diff --git a/drivers/char/tpm/st33zp24/i2c.c b/drivers/char/tpm/st33zp24/i2c.c
index a3aa411389e7..8156bb2af78c 100644
--- a/drivers/char/tpm/st33zp24/i2c.c
+++ b/drivers/char/tpm/st33zp24/i2c.c
@@ -6,13 +6,9 @@
#include <linux/module.h>
#include <linux/i2c.h>
-#include <linux/gpio.h>
-#include <linux/gpio/consumer.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
+#include <linux/of.h>
#include <linux/acpi.h>
#include <linux/tpm.h>
-#include <linux/platform_data/st33zp24.h>
#include "../tpm.h"
#include "st33zp24.h"
@@ -22,7 +18,6 @@
struct st33zp24_i2c_phy {
struct i2c_client *client;
u8 buf[ST33ZP24_BUFSIZE + 1];
- int io_lpcpd;
};
/*
@@ -99,115 +94,6 @@ static const struct st33zp24_phy_ops i2c_phy_ops = {
.recv = st33zp24_i2c_recv,
};
-static const struct acpi_gpio_params lpcpd_gpios = { 1, 0, false };
-
-static const struct acpi_gpio_mapping acpi_st33zp24_gpios[] = {
- { "lpcpd-gpios", &lpcpd_gpios, 1 },
- {},
-};
-
-static int st33zp24_i2c_acpi_request_resources(struct i2c_client *client)
-{
- struct tpm_chip *chip = i2c_get_clientdata(client);
- struct st33zp24_dev *tpm_dev = dev_get_drvdata(&chip->dev);
- struct st33zp24_i2c_phy *phy = tpm_dev->phy_id;
- struct gpio_desc *gpiod_lpcpd;
- struct device *dev = &client->dev;
- int ret;
-
- ret = devm_acpi_dev_add_driver_gpios(dev, acpi_st33zp24_gpios);
- if (ret)
- return ret;
-
- /* Get LPCPD GPIO from ACPI */
- gpiod_lpcpd = devm_gpiod_get(dev, "lpcpd", GPIOD_OUT_HIGH);
- if (IS_ERR(gpiod_lpcpd)) {
- dev_err(&client->dev,
- "Failed to retrieve lpcpd-gpios from acpi.\n");
- phy->io_lpcpd = -1;
- /*
- * lpcpd pin is not specified. This is not an issue as
- * power management can be also managed by TPM specific
- * commands. So leave with a success status code.
- */
- return 0;
- }
-
- phy->io_lpcpd = desc_to_gpio(gpiod_lpcpd);
-
- return 0;
-}
-
-static int st33zp24_i2c_of_request_resources(struct i2c_client *client)
-{
- struct tpm_chip *chip = i2c_get_clientdata(client);
- struct st33zp24_dev *tpm_dev = dev_get_drvdata(&chip->dev);
- struct st33zp24_i2c_phy *phy = tpm_dev->phy_id;
- struct device_node *pp;
- int gpio;
- int ret;
-
- pp = client->dev.of_node;
- if (!pp) {
- dev_err(&client->dev, "No platform data\n");
- return -ENODEV;
- }
-
- /* Get GPIO from device tree */
- gpio = of_get_named_gpio(pp, "lpcpd-gpios", 0);
- if (gpio < 0) {
- dev_err(&client->dev,
- "Failed to retrieve lpcpd-gpios from dts.\n");
- phy->io_lpcpd = -1;
- /*
- * lpcpd pin is not specified. This is not an issue as
- * power management can be also managed by TPM specific
- * commands. So leave with a success status code.
- */
- return 0;
- }
- /* GPIO request and configuration */
- ret = devm_gpio_request_one(&client->dev, gpio,
- GPIOF_OUT_INIT_HIGH, "TPM IO LPCPD");
- if (ret) {
- dev_err(&client->dev, "Failed to request lpcpd pin\n");
- return -ENODEV;
- }
- phy->io_lpcpd = gpio;
-
- return 0;
-}
-
-static int st33zp24_i2c_request_resources(struct i2c_client *client)
-{
- struct tpm_chip *chip = i2c_get_clientdata(client);
- struct st33zp24_dev *tpm_dev = dev_get_drvdata(&chip->dev);
- struct st33zp24_i2c_phy *phy = tpm_dev->phy_id;
- struct st33zp24_platform_data *pdata;
- int ret;
-
- pdata = client->dev.platform_data;
- if (!pdata) {
- dev_err(&client->dev, "No platform data\n");
- return -ENODEV;
- }
-
- /* store for late use */
- phy->io_lpcpd = pdata->io_lpcpd;
-
- if (gpio_is_valid(pdata->io_lpcpd)) {
- ret = devm_gpio_request_one(&client->dev,
- pdata->io_lpcpd, GPIOF_OUT_INIT_HIGH,
- "TPM IO_LPCPD");
- if (ret) {
- dev_err(&client->dev, "Failed to request lpcpd pin\n");
- return ret;
- }
- }
-
- return 0;
-}
-
/*
* st33zp24_i2c_probe initialize the TPM device
* @param: client, the i2c_client description (TPM I2C description).
@@ -218,16 +104,8 @@ static int st33zp24_i2c_request_resources(struct i2c_client *client)
static int st33zp24_i2c_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
- int ret;
- struct st33zp24_platform_data *pdata;
struct st33zp24_i2c_phy *phy;
- if (!client) {
- pr_info("%s: i2c client is NULL. Device not accessible.\n",
- __func__);
- return -ENODEV;
- }
-
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
dev_info(&client->dev, "client not i2c capable\n");
return -ENODEV;
@@ -240,23 +118,7 @@ static int st33zp24_i2c_probe(struct i2c_client *client,
phy->client = client;
- pdata = client->dev.platform_data;
- if (!pdata && client->dev.of_node) {
- ret = st33zp24_i2c_of_request_resources(client);
- if (ret)
- return ret;
- } else if (pdata) {
- ret = st33zp24_i2c_request_resources(client);
- if (ret)
- return ret;
- } else if (ACPI_HANDLE(&client->dev)) {
- ret = st33zp24_i2c_acpi_request_resources(client);
- if (ret)
- return ret;
- }
-
- return st33zp24_probe(phy, &i2c_phy_ops, &client->dev, client->irq,
- phy->io_lpcpd);
+ return st33zp24_probe(phy, &i2c_phy_ops, &client->dev, client->irq);
}
/*
diff --git a/drivers/char/tpm/st33zp24/spi.c b/drivers/char/tpm/st33zp24/spi.c
index 22d184884694..2154059f0235 100644
--- a/drivers/char/tpm/st33zp24/spi.c
+++ b/drivers/char/tpm/st33zp24/spi.c
@@ -6,13 +6,9 @@
#include <linux/module.h>
#include <linux/spi/spi.h>
-#include <linux/gpio.h>
-#include <linux/gpio/consumer.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
+#include <linux/of.h>
#include <linux/acpi.h>
#include <linux/tpm.h>
-#include <linux/platform_data/st33zp24.h>
#include "../tpm.h"
#include "st33zp24.h"
@@ -61,7 +57,6 @@ struct st33zp24_spi_phy {
u8 tx_buf[ST33ZP24_SPI_BUFFER_SIZE];
u8 rx_buf[ST33ZP24_SPI_BUFFER_SIZE];
- int io_lpcpd;
int latency;
};
@@ -218,115 +213,6 @@ static const struct st33zp24_phy_ops spi_phy_ops = {
.recv = st33zp24_spi_recv,
};
-static const struct acpi_gpio_params lpcpd_gpios = { 1, 0, false };
-
-static const struct acpi_gpio_mapping acpi_st33zp24_gpios[] = {
- { "lpcpd-gpios", &lpcpd_gpios, 1 },
- {},
-};
-
-static int st33zp24_spi_acpi_request_resources(struct spi_device *spi_dev)
-{
- struct tpm_chip *chip = spi_get_drvdata(spi_dev);
- struct st33zp24_dev *tpm_dev = dev_get_drvdata(&chip->dev);
- struct st33zp24_spi_phy *phy = tpm_dev->phy_id;
- struct gpio_desc *gpiod_lpcpd;
- struct device *dev = &spi_dev->dev;
- int ret;
-
- ret = devm_acpi_dev_add_driver_gpios(dev, acpi_st33zp24_gpios);
- if (ret)
- return ret;
-
- /* Get LPCPD GPIO from ACPI */
- gpiod_lpcpd = devm_gpiod_get(dev, "lpcpd", GPIOD_OUT_HIGH);
- if (IS_ERR(gpiod_lpcpd)) {
- dev_err(dev, "Failed to retrieve lpcpd-gpios from acpi.\n");
- phy->io_lpcpd = -1;
- /*
- * lpcpd pin is not specified. This is not an issue as
- * power management can be also managed by TPM specific
- * commands. So leave with a success status code.
- */
- return 0;
- }
-
- phy->io_lpcpd = desc_to_gpio(gpiod_lpcpd);
-
- return 0;
-}
-
-static int st33zp24_spi_of_request_resources(struct spi_device *spi_dev)
-{
- struct tpm_chip *chip = spi_get_drvdata(spi_dev);
- struct st33zp24_dev *tpm_dev = dev_get_drvdata(&chip->dev);
- struct st33zp24_spi_phy *phy = tpm_dev->phy_id;
- struct device_node *pp;
- int gpio;
- int ret;
-
- pp = spi_dev->dev.of_node;
- if (!pp) {
- dev_err(&spi_dev->dev, "No platform data\n");
- return -ENODEV;
- }
-
- /* Get GPIO from device tree */
- gpio = of_get_named_gpio(pp, "lpcpd-gpios", 0);
- if (gpio < 0) {
- dev_err(&spi_dev->dev,
- "Failed to retrieve lpcpd-gpios from dts.\n");
- phy->io_lpcpd = -1;
- /*
- * lpcpd pin is not specified. This is not an issue as
- * power management can be also managed by TPM specific
- * commands. So leave with a success status code.
- */
- return 0;
- }
- /* GPIO request and configuration */
- ret = devm_gpio_request_one(&spi_dev->dev, gpio,
- GPIOF_OUT_INIT_HIGH, "TPM IO LPCPD");
- if (ret) {
- dev_err(&spi_dev->dev, "Failed to request lpcpd pin\n");
- return -ENODEV;
- }
- phy->io_lpcpd = gpio;
-
- return 0;
-}
-
-static int st33zp24_spi_request_resources(struct spi_device *dev)
-{
- struct tpm_chip *chip = spi_get_drvdata(dev);
- struct st33zp24_dev *tpm_dev = dev_get_drvdata(&chip->dev);
- struct st33zp24_spi_phy *phy = tpm_dev->phy_id;
- struct st33zp24_platform_data *pdata;
- int ret;
-
- pdata = dev->dev.platform_data;
- if (!pdata) {
- dev_err(&dev->dev, "No platform data\n");
- return -ENODEV;
- }
-
- /* store for late use */
- phy->io_lpcpd = pdata->io_lpcpd;
-
- if (gpio_is_valid(pdata->io_lpcpd)) {
- ret = devm_gpio_request_one(&dev->dev,
- pdata->io_lpcpd, GPIOF_OUT_INIT_HIGH,
- "TPM IO_LPCPD");
- if (ret) {
- dev_err(&dev->dev, "%s : reset gpio_request failed\n",
- __FILE__);
- return ret;
- }
- }
-
- return 0;
-}
-
/*
* st33zp24_spi_probe initialize the TPM device
* @param: dev, the spi_device description (TPM SPI description).
@@ -335,17 +221,8 @@ static int st33zp24_spi_request_resources(struct spi_device *dev)
*/
static int st33zp24_spi_probe(struct spi_device *dev)
{
- int ret;
- struct st33zp24_platform_data *pdata;
struct st33zp24_spi_phy *phy;
- /* Check SPI platform functionnalities */
- if (!dev) {
- pr_info("%s: dev is NULL. Device is not accessible.\n",
- __func__);
- return -ENODEV;
- }
-
phy = devm_kzalloc(&dev->dev, sizeof(struct st33zp24_spi_phy),
GFP_KERNEL);
if (!phy)
@@ -353,27 +230,11 @@ static int st33zp24_spi_probe(struct spi_device *dev)
phy->spi_device = dev;
- pdata = dev->dev.platform_data;
- if (!pdata && dev->dev.of_node) {
- ret = st33zp24_spi_of_request_resources(dev);
- if (ret)
- return ret;
- } else if (pdata) {
- ret = st33zp24_spi_request_resources(dev);
- if (ret)
- return ret;
- } else if (ACPI_HANDLE(&dev->dev)) {
- ret = st33zp24_spi_acpi_request_resources(dev);
- if (ret)
- return ret;
- }
-
phy->latency = st33zp24_spi_evaluate_latency(phy);
if (phy->latency <= 0)
return -ENODEV;
- return st33zp24_probe(phy, &spi_phy_ops, &dev->dev, dev->irq,
- phy->io_lpcpd);
+ return st33zp24_probe(phy, &spi_phy_ops, &dev->dev, dev->irq);
}
/*
@@ -411,7 +272,7 @@ static SIMPLE_DEV_PM_OPS(st33zp24_spi_ops, st33zp24_pm_suspend,
static struct spi_driver st33zp24_spi_driver = {
.driver = {
- .name = TPM_ST33_SPI,
+ .name = "st33zp24-spi",
.pm = &st33zp24_spi_ops,
.of_match_table = of_match_ptr(of_st33zp24_spi_match),
.acpi_match_table = ACPI_PTR(st33zp24_spi_acpi_match),
diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c
index 15b393e92c8e..a5b554cd4778 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -4,6 +4,7 @@
* Copyright (C) 2009 - 2016 STMicroelectronics
*/
+#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/kernel.h>
@@ -12,7 +13,7 @@
#include <linux/freezer.h>
#include <linux/string.h>
#include <linux/interrupt.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/io.h>
@@ -432,11 +433,18 @@ static const struct tpm_class_ops st33zp24_tpm = {
.req_canceled = st33zp24_req_canceled,
};
+static const struct acpi_gpio_params lpcpd_gpios = { 1, 0, false };
+
+static const struct acpi_gpio_mapping acpi_st33zp24_gpios[] = {
+ { "lpcpd-gpios", &lpcpd_gpios, 1 },
+ { },
+};
+
/*
* initialize the TPM device
*/
int st33zp24_probe(void *phy_id, const struct st33zp24_phy_ops *ops,
- struct device *dev, int irq, int io_lpcpd)
+ struct device *dev, int irq)
{
int ret;
u8 intmask = 0;
@@ -463,6 +471,25 @@ int st33zp24_probe(void *phy_id, const struct st33zp24_phy_ops *ops,
tpm_dev->locality = LOCALITY0;
+ if (ACPI_COMPANION(dev)) {
+ ret = devm_acpi_dev_add_driver_gpios(dev, acpi_st33zp24_gpios);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Get LPCPD GPIO. If lpcpd pin is not specified. This is not an
+ * issue as power management can be also managed by TPM specific
+ * commands.
+ */
+ tpm_dev->io_lpcpd = devm_gpiod_get_optional(dev, "lpcpd",
+ GPIOD_OUT_HIGH);
+ ret = PTR_ERR_OR_ZERO(tpm_dev->io_lpcpd);
+ if (ret) {
+ dev_err(dev, "failed to request lpcpd gpio: %d\n", ret);
+ return ret;
+ }
+
if (irq) {
/* INTERRUPT Setup */
init_waitqueue_head(&tpm_dev->read_queue);
@@ -525,8 +552,8 @@ int st33zp24_pm_suspend(struct device *dev)
int ret = 0;
- if (gpio_is_valid(tpm_dev->io_lpcpd))
- gpio_set_value(tpm_dev->io_lpcpd, 0);
+ if (tpm_dev->io_lpcpd)
+ gpiod_set_value_cansleep(tpm_dev->io_lpcpd, 0);
else
ret = tpm_pm_suspend(dev);
@@ -540,8 +567,8 @@ int st33zp24_pm_resume(struct device *dev)
struct st33zp24_dev *tpm_dev = dev_get_drvdata(&chip->dev);
int ret = 0;
- if (gpio_is_valid(tpm_dev->io_lpcpd)) {
- gpio_set_value(tpm_dev->io_lpcpd, 1);
+ if (tpm_dev->io_lpcpd) {
+ gpiod_set_value_cansleep(tpm_dev->io_lpcpd, 1);
ret = wait_for_stat(chip,
TPM_STS_VALID, chip->timeout_b,
&tpm_dev->read_queue, false);
diff --git a/drivers/char/tpm/st33zp24/st33zp24.h b/drivers/char/tpm/st33zp24/st33zp24.h
index b387a476c555..5acc85f711e6 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.h
+++ b/drivers/char/tpm/st33zp24/st33zp24.h
@@ -7,6 +7,9 @@
#ifndef __LOCAL_ST33ZP24_H__
#define __LOCAL_ST33ZP24_H__
+#define TPM_ST33_I2C "st33zp24-i2c"
+#define TPM_ST33_SPI "st33zp24-spi"
+
#define TPM_WRITE_DIRECTION 0x80
#define ST33ZP24_BUFSIZE 2048
@@ -17,7 +20,7 @@ struct st33zp24_dev {
int locality;
int irq;
u32 intrs;
- int io_lpcpd;
+ struct gpio_desc *io_lpcpd;
wait_queue_head_t read_queue;
};
@@ -33,6 +36,6 @@ int st33zp24_pm_resume(struct device *dev);
#endif
int st33zp24_probe(void *phy_id, const struct st33zp24_phy_ops *ops,
- struct device *dev, int irq, int io_lpcpd);
+ struct device *dev, int irq);
void st33zp24_remove(struct tpm_chip *chip);
#endif /* __LOCAL_ST33ZP24_H__ */
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 783d65fc71f0..741d8f3e8fb3 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -373,6 +373,11 @@ out:
}
EXPORT_SYMBOL_GPL(tpm_chip_alloc);
+static void tpm_put_device(void *dev)
+{
+ put_device(dev);
+}
+
/**
* tpmm_chip_alloc() - allocate a new struct tpm_chip instance
* @pdev: parent device to which the chip is associated
@@ -391,7 +396,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *pdev,
return chip;
rc = devm_add_action_or_reset(pdev,
- (void (*)(void *)) put_device,
+ tpm_put_device,
&chip->dev);
if (rc)
return ERR_PTR(rc);
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 1621ce818705..d69905233aff 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev)
!pm_suspend_via_firmware())
goto suspended;
- if (!tpm_chip_start(chip)) {
+ rc = tpm_try_get_ops(chip);
+ if (!rc) {
if (chip->flags & TPM_CHIP_FLAG_TPM2)
tpm2_shutdown(chip, TPM2_SU_STATE);
else
rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
- tpm_chip_stop(chip);
+ tpm_put_ops(chip);
}
suspended:
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index 18606651d1aa..16fc481d6095 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -252,7 +252,7 @@ static int __crb_relinquish_locality(struct device *dev,
iowrite32(CRB_LOC_CTRL_RELINQUISH, &priv->regs_h->loc_ctrl);
if (!crb_wait_for_reg_32(&priv->regs_h->loc_state, mask, value,
TPM2_TIMEOUT_C)) {
- dev_warn(dev, "TPM_LOC_STATE_x.requestAccess timed out\n");
+ dev_warn(dev, "TPM_LOC_STATE_x.Relinquish timed out\n");
return -ETIME;
}
@@ -676,12 +676,16 @@ static int crb_acpi_add(struct acpi_device *device)
/* Should the FIFO driver handle this? */
sm = buf->start_method;
- if (sm == ACPI_TPM2_MEMORY_MAPPED)
- return -ENODEV;
+ if (sm == ACPI_TPM2_MEMORY_MAPPED) {
+ rc = -ENODEV;
+ goto out;
+ }
priv = devm_kzalloc(dev, sizeof(struct crb_priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
+ if (!priv) {
+ rc = -ENOMEM;
+ goto out;
+ }
if (sm == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC) {
if (buf->header.length < (sizeof(*buf) + sizeof(*crb_smc))) {
@@ -689,7 +693,8 @@ static int crb_acpi_add(struct acpi_device *device)
FW_BUG "TPM2 ACPI table has wrong size %u for start method type %d\n",
buf->header.length,
ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
crb_smc = ACPI_ADD_PTR(struct tpm2_crb_smc, buf, sizeof(*buf));
priv->smc_func_id = crb_smc->smc_func_id;
@@ -700,17 +705,23 @@ static int crb_acpi_add(struct acpi_device *device)
rc = crb_map_io(device, priv, buf);
if (rc)
- return rc;
+ goto out;
chip = tpmm_chip_alloc(dev, &tpm_crb);
- if (IS_ERR(chip))
- return PTR_ERR(chip);
+ if (IS_ERR(chip)) {
+ rc = PTR_ERR(chip);
+ goto out;
+ }
dev_set_drvdata(&chip->dev, priv);
chip->acpi_dev_handle = device->handle;
chip->flags = TPM_CHIP_FLAG_TPM2;
- return tpm_chip_register(chip);
+ rc = tpm_chip_register(chip);
+
+out:
+ acpi_put_table((struct acpi_table_header *)buf);
+ return rc;
}
static int crb_acpi_remove(struct acpi_device *device)
diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c
index 5c233423c56f..deff23bb54bf 100644
--- a/drivers/char/tpm/tpm_ftpm_tee.c
+++ b/drivers/char/tpm/tpm_ftpm_tee.c
@@ -397,7 +397,13 @@ static int __init ftpm_mod_init(void)
if (rc)
return rc;
- return driver_register(&ftpm_tee_driver.driver);
+ rc = driver_register(&ftpm_tee_driver.driver);
+ if (rc) {
+ platform_driver_unregister(&ftpm_tee_plat_driver);
+ return rc;
+ }
+
+ return 0;
}
static void __exit ftpm_mod_exit(void)
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index bcff6429e0b4..ed5dabd3c72d 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -125,6 +125,7 @@ static int check_acpi_tpm2(struct device *dev)
const struct acpi_device_id *aid = acpi_match_device(tpm_acpi_tbl, dev);
struct acpi_table_tpm2 *tbl;
acpi_status st;
+ int ret = 0;
if (!aid || aid->driver_data != DEVICE_IS_TPM2)
return 0;
@@ -132,8 +133,7 @@ static int check_acpi_tpm2(struct device *dev)
/* If the ACPI TPM2 signature is matched then a global ACPI_SIG_TPM2
* table is mandatory
*/
- st =
- acpi_get_table(ACPI_SIG_TPM2, 1, (struct acpi_table_header **)&tbl);
+ st = acpi_get_table(ACPI_SIG_TPM2, 1, (struct acpi_table_header **)&tbl);
if (ACPI_FAILURE(st) || tbl->header.length < sizeof(*tbl)) {
dev_err(dev, FW_BUG "failed to get TPM2 ACPI table\n");
return -EINVAL;
@@ -141,9 +141,10 @@ static int check_acpi_tpm2(struct device *dev)
/* The tpm2_crb driver handles this device */
if (tbl->start_method != ACPI_TPM2_MEMORY_MAPPED)
- return -ENODEV;
+ ret = -ENODEV;
- return 0;
+ acpi_put_table((struct acpi_table_header *)tbl);
+ return ret;
}
#else
static int check_acpi_tpm2(struct device *dev)
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 757623bacfd5..3f98e587b3e8 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -682,15 +682,19 @@ static bool tpm_tis_req_canceled(struct tpm_chip *chip, u8 status)
{
struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
- switch (priv->manufacturer_id) {
- case TPM_VID_WINBOND:
- return ((status == TPM_STS_VALID) ||
- (status == (TPM_STS_VALID | TPM_STS_COMMAND_READY)));
- case TPM_VID_STM:
- return (status == (TPM_STS_VALID | TPM_STS_COMMAND_READY));
- default:
- return (status == TPM_STS_COMMAND_READY);
+ if (!test_bit(TPM_TIS_DEFAULT_CANCELLATION, &priv->flags)) {
+ switch (priv->manufacturer_id) {
+ case TPM_VID_WINBOND:
+ return ((status == TPM_STS_VALID) ||
+ (status == (TPM_STS_VALID | TPM_STS_COMMAND_READY)));
+ case TPM_VID_STM:
+ return (status == (TPM_STS_VALID | TPM_STS_COMMAND_READY));
+ default:
+ break;
+ }
}
+
+ return status == TPM_STS_COMMAND_READY;
}
static irqreturn_t tis_int_handler(int dummy, void *dev_id)
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index 66a5a13cd1df..b68479e0de10 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -86,6 +86,7 @@ enum tis_defaults {
enum tpm_tis_flags {
TPM_TIS_ITPM_WORKAROUND = BIT(0),
TPM_TIS_INVALID_STATUS = BIT(1),
+ TPM_TIS_DEFAULT_CANCELLATION = BIT(2),
};
struct tpm_tis_data {
diff --git a/drivers/char/tpm/tpm_tis_i2c.c b/drivers/char/tpm/tpm_tis_i2c.c
index 0692510dfcab..f3a7251c8e38 100644
--- a/drivers/char/tpm/tpm_tis_i2c.c
+++ b/drivers/char/tpm/tpm_tis_i2c.c
@@ -49,7 +49,7 @@
/* Masks with bits that must be read zero */
#define TPM_ACCESS_READ_ZERO 0x48
-#define TPM_INT_ENABLE_ZERO 0x7FFFFF6
+#define TPM_INT_ENABLE_ZERO 0x7FFFFF60
#define TPM_STS_READ_ZERO 0x23
#define TPM_INTF_CAPABILITY_ZERO 0x0FFFF000
#define TPM_I2C_INTERFACE_CAPABILITY_ZERO 0x80000000
@@ -329,6 +329,7 @@ static int tpm_tis_i2c_probe(struct i2c_client *dev,
if (!phy->io_buf)
return -ENOMEM;
+ set_bit(TPM_TIS_DEFAULT_CANCELLATION, &phy->priv.flags);
phy->i2c_client = dev;
/* must precede all communication with the tpm */
diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c
index b174f727a8ef..16870943a13e 100644
--- a/drivers/clk/at91/at91rm9200.c
+++ b/drivers/clk/at91/at91rm9200.c
@@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = {
};
static const struct sck at91rm9200_systemck[] = {
- { .n = "udpck", .p = "usbck", .id = 2 },
+ { .n = "udpck", .p = "usbck", .id = 1 },
{ .n = "uhpck", .p = "usbck", .id = 4 },
{ .n = "pck0", .p = "prog0", .id = 8 },
{ .n = "pck1", .p = "prog1", .id = 9 },
diff --git a/drivers/clk/qcom/gcc-sc8280xp.c b/drivers/clk/qcom/gcc-sc8280xp.c
index a18ed88f3b82..b3198784e1c3 100644
--- a/drivers/clk/qcom/gcc-sc8280xp.c
+++ b/drivers/clk/qcom/gcc-sc8280xp.c
@@ -5364,6 +5364,8 @@ static struct clk_branch gcc_ufs_1_card_clkref_clk = {
.enable_mask = BIT(0),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_ufs_1_card_clkref_clk",
+ .parent_data = &gcc_parent_data_tcxo,
+ .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -5432,6 +5434,8 @@ static struct clk_branch gcc_ufs_card_clkref_clk = {
.enable_mask = BIT(0),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_ufs_card_clkref_clk",
+ .parent_data = &gcc_parent_data_tcxo,
+ .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
@@ -5848,6 +5852,8 @@ static struct clk_branch gcc_ufs_ref_clkref_clk = {
.enable_mask = BIT(0),
.hw.init = &(const struct clk_init_data) {
.name = "gcc_ufs_ref_clkref_clk",
+ .parent_data = &gcc_parent_data_tcxo,
+ .num_parents = 1,
.ops = &clk_branch2_ops,
},
},
diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index 7cf5e130e92f..0f21a8a767ac 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c
@@ -11,7 +11,6 @@
#include <linux/kernel.h>
#include <linux/ktime.h>
#include <linux/pm_domain.h>
-#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/reset-controller.h>
@@ -56,22 +55,6 @@ enum gdsc_status {
GDSC_ON
};
-static int gdsc_pm_runtime_get(struct gdsc *sc)
-{
- if (!sc->dev)
- return 0;
-
- return pm_runtime_resume_and_get(sc->dev);
-}
-
-static int gdsc_pm_runtime_put(struct gdsc *sc)
-{
- if (!sc->dev)
- return 0;
-
- return pm_runtime_put_sync(sc->dev);
-}
-
/* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */
static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status)
{
@@ -271,8 +254,9 @@ static void gdsc_retain_ff_on(struct gdsc *sc)
regmap_update_bits(sc->regmap, sc->gdscr, mask, mask);
}
-static int _gdsc_enable(struct gdsc *sc)
+static int gdsc_enable(struct generic_pm_domain *domain)
{
+ struct gdsc *sc = domain_to_gdsc(domain);
int ret;
if (sc->pwrsts == PWRSTS_ON)
@@ -328,22 +312,11 @@ static int _gdsc_enable(struct gdsc *sc)
return 0;
}
-static int gdsc_enable(struct generic_pm_domain *domain)
+static int gdsc_disable(struct generic_pm_domain *domain)
{
struct gdsc *sc = domain_to_gdsc(domain);
int ret;
- ret = gdsc_pm_runtime_get(sc);
- if (ret)
- return ret;
-
- return _gdsc_enable(sc);
-}
-
-static int _gdsc_disable(struct gdsc *sc)
-{
- int ret;
-
if (sc->pwrsts == PWRSTS_ON)
return gdsc_assert_reset(sc);
@@ -388,18 +361,6 @@ static int _gdsc_disable(struct gdsc *sc)
return 0;
}
-static int gdsc_disable(struct generic_pm_domain *domain)
-{
- struct gdsc *sc = domain_to_gdsc(domain);
- int ret;
-
- ret = _gdsc_disable(sc);
-
- gdsc_pm_runtime_put(sc);
-
- return ret;
-}
-
static int gdsc_init(struct gdsc *sc)
{
u32 mask, val;
@@ -447,11 +408,6 @@ static int gdsc_init(struct gdsc *sc)
return ret;
}
- /* ...and the power-domain */
- ret = gdsc_pm_runtime_get(sc);
- if (ret)
- goto err_disable_supply;
-
/*
* Votable GDSCs can be ON due to Vote from other masters.
* If a Votable GDSC is ON, make sure we have a Vote.
@@ -459,14 +415,14 @@ static int gdsc_init(struct gdsc *sc)
if (sc->flags & VOTABLE) {
ret = gdsc_update_collapse_bit(sc, false);
if (ret)
- goto err_put_rpm;
+ goto err_disable_supply;
}
/* Turn on HW trigger mode if supported */
if (sc->flags & HW_CTRL) {
ret = gdsc_hwctrl(sc, true);
if (ret < 0)
- goto err_put_rpm;
+ goto err_disable_supply;
}
/*
@@ -496,13 +452,10 @@ static int gdsc_init(struct gdsc *sc)
ret = pm_genpd_init(&sc->pd, NULL, !on);
if (ret)
- goto err_put_rpm;
+ goto err_disable_supply;
return 0;
-err_put_rpm:
- if (on)
- gdsc_pm_runtime_put(sc);
err_disable_supply:
if (on && sc->rsupply)
regulator_disable(sc->rsupply);
@@ -541,8 +494,6 @@ int gdsc_register(struct gdsc_desc *desc,
for (i = 0; i < num; i++) {
if (!scs[i])
continue;
- if (pm_runtime_enabled(dev))
- scs[i]->dev = dev;
scs[i]->regmap = regmap;
scs[i]->rcdev = rcdev;
ret = gdsc_init(scs[i]);
diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h
index 981a12c8502d..803512688336 100644
--- a/drivers/clk/qcom/gdsc.h
+++ b/drivers/clk/qcom/gdsc.h
@@ -30,7 +30,6 @@ struct reset_controller_dev;
* @resets: ids of resets associated with this gdsc
* @reset_count: number of @resets
* @rcdev: reset controller
- * @dev: the device holding the GDSC, used for pm_runtime calls
*/
struct gdsc {
struct generic_pm_domain pd;
@@ -74,7 +73,6 @@ struct gdsc {
const char *supply;
struct regulator *rsupply;
- struct device *dev;
};
struct gdsc_desc {
diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c
index 273f77d54dab..e6d6cbf8c4e6 100644
--- a/drivers/clk/samsung/clk-exynos-clkout.c
+++ b/drivers/clk/samsung/clk-exynos-clkout.c
@@ -81,17 +81,19 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids);
static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask)
{
const struct exynos_clkout_variant *variant;
+ const struct of_device_id *match;
if (!dev->parent) {
dev_err(dev, "not instantiated from MFD\n");
return -EINVAL;
}
- variant = of_device_get_match_data(dev->parent);
- if (!variant) {
+ match = of_match_device(exynos_clkout_ids, dev->parent);
+ if (!match) {
dev_err(dev, "cannot match parent device\n");
return -EINVAL;
}
+ variant = match->data;
*mux_mask = variant->mux_mask;
diff --git a/drivers/clk/samsung/clk-exynos7885.c b/drivers/clk/samsung/clk-exynos7885.c
index 62ce6814f141..0d2a950ed184 100644
--- a/drivers/clk/samsung/clk-exynos7885.c
+++ b/drivers/clk/samsung/clk-exynos7885.c
@@ -231,7 +231,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = {
CLK_CON_DIV_PLL_SHARED0_DIV2, 0, 1),
DIV(CLK_DOUT_SHARED0_DIV3, "dout_shared0_div3", "fout_shared0_pll",
CLK_CON_DIV_PLL_SHARED0_DIV3, 0, 2),
- DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "fout_shared0_pll",
+ DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "dout_shared0_div2",
CLK_CON_DIV_PLL_SHARED0_DIV4, 0, 1),
DIV(CLK_DOUT_SHARED0_DIV5, "dout_shared0_div5", "fout_shared0_pll",
CLK_CON_DIV_PLL_SHARED0_DIV5, 0, 3),
@@ -239,7 +239,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = {
CLK_CON_DIV_PLL_SHARED1_DIV2, 0, 1),
DIV(CLK_DOUT_SHARED1_DIV3, "dout_shared1_div3", "fout_shared1_pll",
CLK_CON_DIV_PLL_SHARED1_DIV3, 0, 2),
- DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "fout_shared1_pll",
+ DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "dout_shared1_div2",
CLK_CON_DIV_PLL_SHARED1_DIV4, 0, 1),
/* CORE */
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 969a552da8d2..a0d66fabf073 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -51,7 +51,7 @@ static int riscv_clock_next_event(unsigned long delta,
static unsigned int riscv_clock_event_irq;
static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = {
.name = "riscv_timer_clockevent",
- .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP,
+ .features = CLOCK_EVT_FEAT_ONESHOT,
.rating = 100,
.set_next_event = riscv_clock_next_event,
};
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index 97086fab698e..903325aac991 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -8,6 +8,13 @@
static bool nohmem;
module_param_named(disable, nohmem, bool, 0444);
+static struct resource hmem_active = {
+ .name = "HMEM devices",
+ .start = 0,
+ .end = -1,
+ .flags = IORESOURCE_MEM,
+};
+
void hmem_register_device(int target_nid, struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
@@ -41,6 +48,12 @@ void hmem_register_device(int target_nid, struct resource *r)
goto out_pdev;
}
+ if (!__request_region(&hmem_active, res.start, resource_size(&res),
+ dev_name(&pdev->dev), 0)) {
+ dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res);
+ goto out_active;
+ }
+
pdev->dev.numa_node = numa_map_to_online_node(target_nid);
info = (struct memregion_info) {
.target_node = target_nid,
@@ -66,6 +79,8 @@ void hmem_register_device(int target_nid, struct resource *r)
return;
out_resource:
+ __release_region(&hmem_active, res.start, resource_size(&res));
+out_active:
platform_device_put(pdev);
out_pdev:
memregion_free(id);
@@ -73,15 +88,6 @@ out_pdev:
static __init int hmem_register_one(struct resource *res, void *data)
{
- /*
- * If the resource is not a top-level resource it was already
- * assigned to a device by the HMAT parsing.
- */
- if (res->parent != &iomem_resource) {
- pr_info("HMEM: skip %pr, already claimed\n", res);
- return 0;
- }
-
hmem_register_device(phys_to_target_node(res->start), res);
return 0;
diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c
index a52236e11e5f..4d6c5327471a 100644
--- a/drivers/firmware/efi/earlycon.c
+++ b/drivers/firmware/efi/earlycon.c
@@ -29,8 +29,8 @@ static void *efi_fb;
*/
static int __init efi_earlycon_remap_fb(void)
{
- /* bail if there is no bootconsole or it has been disabled already */
- if (!earlycon_console || !(earlycon_console->flags & CON_ENABLED))
+ /* bail if there is no bootconsole or it was unregistered already */
+ if (!earlycon_console || !console_is_registered(earlycon_console))
return 0;
efi_fb = memremap(fb_base, screen_info.lfb_size,
@@ -42,8 +42,8 @@ early_initcall(efi_earlycon_remap_fb);
static int __init efi_earlycon_unmap_fb(void)
{
- /* unmap the bootconsole fb unless keep_bootcon has left it enabled */
- if (efi_fb && !(earlycon_console->flags & CON_ENABLED))
+ /* unmap the bootconsole fb unless keep_bootcon left it registered */
+ if (efi_fb && !console_is_registered(earlycon_console))
memunmap(efi_fb);
return 0;
}
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 3bddc152fcd4..97a9e84840a0 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -207,7 +207,7 @@ static int efi_pstore_erase(struct pstore_record *record)
static struct pstore_info efi_pstore_info = {
.owner = THIS_MODULE,
- .name = "efi",
+ .name = KBUILD_MODNAME,
.flags = PSTORE_FLAGS_DMESG,
.open = efi_pstore_open,
.close = efi_pstore_close,
diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c
index 14e6b3e64add..6f3ded619c8b 100644
--- a/drivers/gpio/gpio-amd8111.c
+++ b/drivers/gpio/gpio-amd8111.c
@@ -226,7 +226,10 @@ found:
ioport_unmap(gp.pm);
goto out;
}
+ return 0;
+
out:
+ pci_dev_put(pdev);
return err;
}
@@ -234,6 +237,7 @@ static void __exit amd_gpio_exit(void)
{
gpiochip_remove(&gp.chip);
ioport_unmap(gp.pm);
+ pci_dev_put(gp.pdev);
}
module_init(amd_gpio_init);
diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
index 870910bb9dd3..200e43a6f4b4 100644
--- a/drivers/gpio/gpio-rockchip.c
+++ b/drivers/gpio/gpio-rockchip.c
@@ -610,6 +610,7 @@ static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank)
return -ENODATA;
pctldev = of_pinctrl_get(pctlnp);
+ of_node_put(pctlnp);
if (!pctldev)
return -ENODEV;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4756ea08894f..a70522aef355 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -526,12 +526,13 @@ static int gpiochip_setup_dev(struct gpio_device *gdev)
if (ret)
return ret;
+ /* From this point, the .release() function cleans up gpio_device */
+ gdev->dev.release = gpiodevice_release;
+
ret = gpiochip_sysfs_register(gdev);
if (ret)
goto err_remove_device;
- /* From this point, the .release() function cleans up gpio_device */
- gdev->dev.release = gpiodevice_release;
dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base,
gdev->base + gdev->ngpio - 1, gdev->chip->label ? : "generic");
@@ -597,10 +598,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
struct fwnode_handle *fwnode = NULL;
struct gpio_device *gdev;
unsigned long flags;
- int base = gc->base;
unsigned int i;
+ u32 ngpios = 0;
+ int base = 0;
int ret = 0;
- u32 ngpios;
if (gc->fwnode)
fwnode = gc->fwnode;
@@ -647,17 +648,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
else
gdev->owner = THIS_MODULE;
- gdev->descs = kcalloc(gc->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL);
- if (!gdev->descs) {
- ret = -ENOMEM;
- goto err_free_dev_name;
- }
-
/*
* Try the device properties if the driver didn't supply the number
* of GPIO lines.
*/
- if (gc->ngpio == 0) {
+ ngpios = gc->ngpio;
+ if (ngpios == 0) {
ret = device_property_read_u32(&gdev->dev, "ngpios", &ngpios);
if (ret == -ENODATA)
/*
@@ -668,7 +664,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
*/
ngpios = 0;
else if (ret)
- goto err_free_descs;
+ goto err_free_dev_name;
gc->ngpio = ngpios;
}
@@ -676,13 +672,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
if (gc->ngpio == 0) {
chip_err(gc, "tried to insert a GPIO chip with zero lines\n");
ret = -EINVAL;
- goto err_free_descs;
+ goto err_free_dev_name;
}
if (gc->ngpio > FASTPATH_NGPIO)
chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n",
gc->ngpio, FASTPATH_NGPIO);
+ gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL);
+ if (!gdev->descs) {
+ ret = -ENOMEM;
+ goto err_free_dev_name;
+ }
+
gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL);
if (!gdev->label) {
ret = -ENOMEM;
@@ -701,11 +703,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
* it may be a pipe dream. It will not happen before we get rid
* of the sysfs interface anyways.
*/
+ base = gc->base;
if (base < 0) {
base = gpiochip_find_base(gc->ngpio);
if (base < 0) {
- ret = base;
spin_unlock_irqrestore(&gpio_lock, flags);
+ ret = base;
+ base = 0;
goto err_free_label;
}
/*
@@ -816,6 +820,11 @@ err_remove_of_chip:
err_free_gpiochip_mask:
gpiochip_remove_pin_ranges(gc);
gpiochip_free_valid_mask(gc);
+ if (gdev->dev.release) {
+ /* release() has been registered by gpiochip_setup_dev() */
+ put_device(&gdev->dev);
+ goto err_print_message;
+ }
err_remove_from_list:
spin_lock_irqsave(&gpio_lock, flags);
list_del(&gdev->list);
@@ -829,13 +838,14 @@ err_free_dev_name:
err_free_ida:
ida_free(&gpio_ida, gdev->id);
err_free_gdev:
+ kfree(gdev);
+err_print_message:
/* failures here can mean systems won't boot... */
if (ret != -EPROBE_DEFER) {
pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
- gdev->base, gdev->base + gdev->ngpio - 1,
+ base, base + (int)ngpios - 1,
gc->label ? : "generic", ret);
}
- kfree(gdev);
return ret;
}
EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 0b52af415b28..ce64ca1c6e66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
break;
case IP_VERSION(3, 0, 2):
fw_name = FIRMWARE_VANGOGH;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
break;
case IP_VERSION(3, 0, 16):
fw_name = FIRMWARE_DIMGREY_CAVEFISH;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 1122bd4eae98..4d780e4430e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -907,13 +907,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
/**
- * sdma_v4_0_gfx_stop - stop the gfx async dma engines
+ * sdma_v4_0_gfx_enable - enable the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
- * Stop the gfx async dma ring buffers (VEGA10).
+ * @enable: enable SDMA RB/IB
+ * control the gfx async dma ring buffers (VEGA10).
*/
-static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
{
u32 rb_cntl, ib_cntl;
int i;
@@ -922,10 +922,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
}
}
@@ -1044,7 +1044,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
int i;
if (!enable) {
- sdma_v4_0_gfx_stop(adev);
+ sdma_v4_0_gfx_enable(adev, enable);
sdma_v4_0_rlc_stop(adev);
if (adev->sdma.has_page_queue)
sdma_v4_0_page_stop(adev);
@@ -1960,8 +1960,10 @@ static int sdma_v4_0_suspend(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SMU saves SDMA state for us */
- if (adev->in_s0ix)
+ if (adev->in_s0ix) {
+ sdma_v4_0_gfx_enable(adev, false);
return 0;
+ }
return sdma_v4_0_hw_fini(adev);
}
@@ -1971,8 +1973,12 @@ static int sdma_v4_0_resume(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SMU restores SDMA state for us */
- if (adev->in_s0ix)
+ if (adev->in_s0ix) {
+ sdma_v4_0_enable(adev, true);
+ sdma_v4_0_gfx_enable(adev, true);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
return 0;
+ }
return sdma_v4_0_hw_init(adev);
}
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 6925e0280dbe..f4f3d2665a6b 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -5,6 +5,7 @@ menu "Display Engine Configuration"
config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
+ depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64
select SND_HDA_COMPONENT if SND_HDA_CORE
select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128)
help
@@ -12,6 +13,12 @@ config DRM_AMD_DC
support for AMDGPU. This adds required support for Vega and
Raven ASICs.
+ calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64)
+ architectures built with Clang (all released versions), whereby the stack
+ frame gets blown up to well over 5k. This would cause an immediate kernel
+ panic on most architectures. We'll revert this when the following bug report
+ has been resolved: https://github.com/llvm/llvm-project/issues/41896.
+
config DRM_AMD_DC_DCN
def_bool n
help
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 630f3395e90a..a0207a8f8756 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -1153,7 +1153,7 @@ struct vba_vars_st {
double UrgBurstFactorLumaPre[DC__NUM_DPP__MAX];
double UrgBurstFactorChromaPre[DC__NUM_DPP__MAX];
bool NotUrgentLatencyHidingPre[DC__NUM_DPP__MAX];
- bool LinkCapacitySupport[DC__NUM_DPP__MAX];
+ bool LinkCapacitySupport[DC__VOLTAGE_STATES];
bool VREADY_AT_OR_AFTER_VSYNC[DC__NUM_DPP__MAX];
unsigned int MIN_DST_Y_NEXT_START[DC__NUM_DPP__MAX];
unsigned int VFrontPorch[DC__NUM_DPP__MAX];
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 40d8ca37f5bc..aa51c61a78c7 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -2720,6 +2720,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge,
* if supported. In any case the default RGB888 format is added
*/
+ /* Default 8bit RGB fallback */
+ output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24;
+
if (max_bpc >= 16 && info->bpc == 16) {
if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444)
output_fmts[i++] = MEDIA_BUS_FMT_YUV16_1X48;
@@ -2753,9 +2756,6 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge,
if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444)
output_fmts[i++] = MEDIA_BUS_FMT_YUV8_1X24;
- /* Default 8bit RGB fallback */
- output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24;
-
*num_output_fmts = i;
return output_fmts;
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 3c3561942eb6..eb24322df721 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -931,9 +931,9 @@ static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata)
&pdata->bridge.encoder->crtc->state->adjusted_mode;
u8 hsync_polarity = 0, vsync_polarity = 0;
- if (mode->flags & DRM_MODE_FLAG_PHSYNC)
+ if (mode->flags & DRM_MODE_FLAG_NHSYNC)
hsync_polarity = CHA_HSYNC_POLARITY;
- if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+ if (mode->flags & DRM_MODE_FLAG_NVSYNC)
vsync_polarity = CHA_VSYNC_POLARITY;
ti_sn65dsi86_write_u16(pdata, SN_CHA_ACTIVE_LINE_LENGTH_LOW_REG,
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 35138f8a375c..b602cd72a120 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -571,12 +571,20 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
{
struct drm_gem_object *obj = vma->vm_private_data;
struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
- int ret;
WARN_ON(shmem->base.import_attach);
- ret = drm_gem_shmem_get_pages(shmem);
- WARN_ON_ONCE(ret != 0);
+ mutex_lock(&shmem->pages_lock);
+
+ /*
+ * We should have already pinned the pages when the buffer was first
+ * mmap'd, vm_open() just grabs an additional reference for the new
+ * mm the vma is getting copied into (ie. on fork()).
+ */
+ if (!WARN_ON_ONCE(!shmem->pages_use_count))
+ shmem->pages_use_count++;
+
+ mutex_unlock(&shmem->pages_lock);
drm_gem_vm_open(vma);
}
@@ -622,10 +630,8 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
}
ret = drm_gem_shmem_get_pages(shmem);
- if (ret) {
- drm_gem_vm_close(vma);
+ if (ret)
return ret;
- }
vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 461c62c88413..de77054195c6 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -3723,12 +3723,16 @@ out:
static u8 bigjoiner_pipes(struct drm_i915_private *i915)
{
+ u8 pipes;
+
if (DISPLAY_VER(i915) >= 12)
- return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D);
+ pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D);
else if (DISPLAY_VER(i915) >= 11)
- return BIT(PIPE_B) | BIT(PIPE_C);
+ pipes = BIT(PIPE_B) | BIT(PIPE_C);
else
- return 0;
+ pipes = 0;
+
+ return pipes & RUNTIME_INFO(i915)->pipe_mask;
}
static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d0b03a928b9a..7caa3412a244 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -625,8 +625,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
return -EINTR;
}
- return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc,
- remaining_timeout);
+ if (timeout)
+ return timeout;
+
+ if (remaining_timeout < 0)
+ remaining_timeout = 0;
+
+ return intel_uc_wait_for_idle(&gt->uc, remaining_timeout);
}
int intel_gt_init(struct intel_gt *gt)
@@ -1017,6 +1022,11 @@ static void mmio_invalidate_full(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
+ if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
+ engine->class == VIDEO_ENHANCEMENT_CLASS ||
+ engine->class == COMPUTE_CLASS))
+ rb.bit = _MASKED_BIT_ENABLE(rb.bit);
+
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
awake |= engine->mask;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index edb881d75630..1dfd01668c79 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock);
if (remaining_timeout)
*remaining_timeout = timeout;
- return active_count ? timeout : 0;
+ return active_count ? timeout ?: -ETIME : 0;
}
static void retire_work_handler(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c
index 2403ccd52c74..bba8cb6e8ae4 100644
--- a/drivers/gpu/drm/i915/intel_dram.c
+++ b/drivers/gpu/drm/i915/intel_dram.c
@@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915)
u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL);
struct dram_info *dram_info = &i915->dram_info;
- val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val);
- switch (val) {
+ switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) {
case 0:
dram_info->type = INTEL_DRAM_DDR4;
break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index 089046fa21be..50fa3df0bc0c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -1085,21 +1085,21 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data,
reset_ppn_array(pdesc->strsPPNs, ARRAY_SIZE(pdesc->strsPPNs));
/* Pin mksGuestStat user pages and store those in the instance descriptor */
- nr_pinned_stat = pin_user_pages(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat, NULL);
+ nr_pinned_stat = pin_user_pages_fast(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat);
if (num_pages_stat != nr_pinned_stat)
goto err_pin_stat;
for (i = 0; i < num_pages_stat; ++i)
pdesc->statPPNs[i] = page_to_pfn(pages_stat[i]);
- nr_pinned_info = pin_user_pages(arg->info, num_pages_info, FOLL_LONGTERM, pages_info, NULL);
+ nr_pinned_info = pin_user_pages_fast(arg->info, num_pages_info, FOLL_LONGTERM, pages_info);
if (num_pages_info != nr_pinned_info)
goto err_pin_info;
for (i = 0; i < num_pages_info; ++i)
pdesc->infoPPNs[i] = page_to_pfn(pages_info[i]);
- nr_pinned_strs = pin_user_pages(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs, NULL);
+ nr_pinned_strs = pin_user_pages_fast(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs);
if (num_pages_strs != nr_pinned_strs)
goto err_pin_strs;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index ecd3c2fc978b..9c79873f62f0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -949,6 +949,10 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
struct drm_device *dev = &dev_priv->drm;
int i, ret;
+ /* Screen objects won't work if GMR's aren't available */
+ if (!dev_priv->has_gmr)
+ return -ENOSYS;
+
if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) {
return -ENOSYS;
}
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 9c1d31f63f85..bd47628da6be 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1315,6 +1315,9 @@ static s32 snto32(__u32 value, unsigned n)
if (!value || !n)
return 0;
+ if (n > 32)
+ n = 32;
+
switch (n) {
case 8: return ((__s8)value);
case 16: return ((__s16)value);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index dad953f66996..8f58c3c1bec3 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -274,6 +274,7 @@
#define USB_DEVICE_ID_CH_AXIS_295 0x001c
#define USB_VENDOR_ID_CHERRY 0x046a
+#define USB_DEVICE_ID_CHERRY_MOUSE_000C 0x000c
#define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023
#define USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR 0x0027
@@ -917,6 +918,7 @@
#define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd
#define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb
#define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0
+#define USB_DEVICE_ID_MS_MOUSE_0783 0x0783
#define USB_VENDOR_ID_MOJO 0x8282
#define USB_DEVICE_ID_RETRO_ADAPTER 0x3201
@@ -1215,6 +1217,7 @@
#define USB_DEVICE_ID_SYNAPTICS_DELL_K15A 0x6e21
#define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4
#define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5
+#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017 0x73f6
#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7
#define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047
@@ -1381,6 +1384,7 @@
#define USB_VENDOR_ID_PRIMAX 0x0461
#define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22
+#define USB_DEVICE_ID_PRIMAX_MOUSE_4E2A 0x4e2a
#define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05
#define USB_DEVICE_ID_PRIMAX_REZEL 0x4e72
#define USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F 0x4d0f
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index 430fa4f52ed3..75ebfcf31889 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -121,6 +121,11 @@ static const struct hid_device_id ite_devices[] = {
USB_VENDOR_ID_SYNAPTICS,
USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003),
.driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT },
+ /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_SYNAPTICS,
+ USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017),
+ .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT },
{ }
};
MODULE_DEVICE_TABLE(hid, ite_devices);
diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
index 5e6a0cef2a06..e3fcf1353fb3 100644
--- a/drivers/hid/hid-lg4ff.c
+++ b/drivers/hid/hid-lg4ff.c
@@ -872,6 +872,12 @@ static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_att
return -ENOMEM;
i = strlen(lbuf);
+
+ if (i == 0) {
+ kfree(lbuf);
+ return -EINVAL;
+ }
+
if (lbuf[i-1] == '\n') {
if (i == 1) {
kfree(lbuf);
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 71a9c258a20b..8a2aac18dcc5 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -4269,21 +4269,6 @@ static void hidpp_remove(struct hid_device *hdev)
mutex_destroy(&hidpp->send_mutex);
}
-static const struct hid_device_id unhandled_hidpp_devices[] = {
- /* Logitech Harmony Adapter for PS3, handled in hid-sony */
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) },
- /* Handled in hid-generic */
- { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD) },
- {}
-};
-
-static bool hidpp_match(struct hid_device *hdev,
- bool ignore_special_driver)
-{
- /* Refuse to handle devices handled by other HID drivers */
- return !hid_match_id(hdev, unhandled_hidpp_devices);
-}
-
#define LDJ_DEVICE(product) \
HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, \
USB_VENDOR_ID_LOGITECH, (product))
@@ -4367,9 +4352,15 @@ static const struct hid_device_id hidpp_devices[] = {
{ /* MX5500 keyboard over Bluetooth */
HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b),
.driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
-
- { /* And try to enable HID++ for all the Logitech Bluetooth devices */
- HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_ANY, USB_VENDOR_ID_LOGITECH, HID_ANY_ID) },
+ { /* M-RCQ142 V470 Cordless Laser Mouse over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) },
+ { /* MX Master mouse over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) },
+ { /* MX Ergo trackball over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) },
+ { /* MX Master 3 mouse over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb023) },
{}
};
@@ -4383,7 +4374,6 @@ static const struct hid_usage_id hidpp_usages[] = {
static struct hid_driver hidpp_driver = {
.name = "logitech-hidpp-device",
.id_table = hidpp_devices,
- .match = hidpp_match,
.report_fixup = hidpp_report_fixup,
.probe = hidpp_probe,
.remove = hidpp_remove,
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 50e1c717fc0a..0e9702c7f7d6 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -54,6 +54,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_MOUSE_000C), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS },
@@ -122,6 +123,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS },
@@ -146,6 +148,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS },
{ HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL },
diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c
index 0fbc408c2607..7fa6fe04f1b2 100644
--- a/drivers/hid/hid-uclogic-core.c
+++ b/drivers/hid/hid-uclogic-core.c
@@ -192,6 +192,7 @@ static int uclogic_probe(struct hid_device *hdev,
* than the pen, so use QUIRK_MULTI_INPUT for all tablets.
*/
hdev->quirks |= HID_QUIRK_MULTI_INPUT;
+ hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE;
/* Allocate and assign driver data */
drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL);
diff --git a/drivers/hid/hid-uclogic-rdesc.c b/drivers/hid/hid-uclogic-rdesc.c
index 4bd54c4fb5b0..6b73eb0df6bd 100644
--- a/drivers/hid/hid-uclogic-rdesc.c
+++ b/drivers/hid/hid-uclogic-rdesc.c
@@ -1193,7 +1193,7 @@ __u8 *uclogic_rdesc_template_apply(const __u8 *template_ptr,
p[sizeof(btn_head)] < param_num) {
v = param_list[p[sizeof(btn_head)]];
put_unaligned((__u8)0x2A, p); /* Usage Maximum */
- put_unaligned_le16((__force u16)cpu_to_le16(v), p + 1);
+ put_unaligned((__force u16)cpu_to_le16(v), (s16 *)(p + 1));
p += sizeof(btn_head) + 1;
} else {
p++;
diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig
index 5273ee2bb134..d65abe65ce73 100644
--- a/drivers/hid/i2c-hid/Kconfig
+++ b/drivers/hid/i2c-hid/Kconfig
@@ -66,6 +66,6 @@ endmenu
config I2C_HID_CORE
tristate
- default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_GOODIX=y
- default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_GOODIX=m
+ default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_ELAN=y || I2C_HID_OF_GOODIX=y
+ default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_ELAN=m || I2C_HID_OF_GOODIX=m
select HID
diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
index 81e688975c6a..a901e4e33d81 100644
--- a/drivers/hwmon/asus-ec-sensors.c
+++ b/drivers/hwmon/asus-ec-sensors.c
@@ -938,6 +938,8 @@ static int asus_ec_probe(struct platform_device *pdev)
ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors);
ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors,
sizeof(struct ec_sensor), GFP_KERNEL);
+ if (!ec_data->sensors)
+ return -ENOMEM;
status = setup_lock_data(dev);
if (status) {
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 8bf32c6c85d9..9bee4d33fbdf 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev)
*/
if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) {
for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) {
- if (host_bridge->device == tjmax_pci_table[i].device)
+ if (host_bridge->device == tjmax_pci_table[i].device) {
+ pci_dev_put(host_bridge);
return tjmax_pci_table[i].tjmax;
+ }
}
}
+ pci_dev_put(host_bridge);
for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) {
if (strstr(c->x86_model_id, tjmax_table[i].id))
@@ -533,6 +536,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
{
struct temp_data *tdata = pdata->core_data[indx];
+ /* if we errored on add then this is already gone */
+ if (!tdata)
+ return;
+
/* Remove the sysfs attributes */
sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);
diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c
index 05f68e9c9477..23b9f94fe0a9 100644
--- a/drivers/hwmon/i5500_temp.c
+++ b/drivers/hwmon/i5500_temp.c
@@ -117,7 +117,7 @@ static int i5500_temp_probe(struct pci_dev *pdev,
u32 tstimer;
s8 tsfsc;
- err = pci_enable_device(pdev);
+ err = pcim_enable_device(pdev);
if (err) {
dev_err(&pdev->dev, "Failed to enable device\n");
return err;
diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c
index f6ec165c0fa8..1837cccd993c 100644
--- a/drivers/hwmon/ibmpex.c
+++ b/drivers/hwmon/ibmpex.c
@@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev)
return;
out_register:
+ list_del(&data->list);
hwmon_device_unregister(data->hwmon_dev);
out_user:
ipmi_destroy_user(data->user);
diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
index 2a57f4b60c29..e06186986444 100644
--- a/drivers/hwmon/ina3221.c
+++ b/drivers/hwmon/ina3221.c
@@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg,
* Shunt Voltage Sum register has 14-bit value with 1-bit shift
* Other Shunt Voltage registers have 12 bits with 3-bit shift
*/
- if (reg == INA3221_SHUNT_SUM)
+ if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM)
*val = sign_extend32(regval >> 1, 14);
else
*val = sign_extend32(regval >> 3, 12);
@@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr,
* SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV
* SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV
*/
- if (reg == INA3221_SHUNT_SUM)
+ if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM)
regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe;
else
regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8;
diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c
index 7404e974762f..2dbbbac9de09 100644
--- a/drivers/hwmon/ltc2947-core.c
+++ b/drivers/hwmon/ltc2947-core.c
@@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val,
return ret;
/* in milidegrees celcius, temp is given by: */
- *val = (__val * 204) + 550;
+ *val = (__val * 204) + 5500;
return 0;
}
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index fe0cd205502d..f58943cb1341 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -852,7 +852,8 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
CDNS_I2C_POLL_US, CDNS_I2C_TIMEOUT_US);
if (ret) {
ret = -EAGAIN;
- i2c_recover_bus(adap);
+ if (id->adap.bus_recovery_info)
+ i2c_recover_bus(adap);
goto out;
}
@@ -1263,8 +1264,13 @@ static int cdns_i2c_probe(struct platform_device *pdev)
id->rinfo.pinctrl = devm_pinctrl_get(&pdev->dev);
if (IS_ERR(id->rinfo.pinctrl)) {
+ int err = PTR_ERR(id->rinfo.pinctrl);
+
dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n");
- return PTR_ERR(id->rinfo.pinctrl);
+ if (err != -ENODEV)
+ return err;
+ } else {
+ id->adap.bus_recovery_info = &id->rinfo;
}
id->membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r_mem);
@@ -1283,7 +1289,6 @@ static int cdns_i2c_probe(struct platform_device *pdev)
id->adap.retries = 3; /* Default retry value. */
id->adap.algo_data = id;
id->adap.dev.parent = &pdev->dev;
- id->adap.bus_recovery_info = &id->rinfo;
init_completion(&id->xfer_done);
snprintf(id->adap.name, sizeof(id->adap.name),
"Cadence I2C at %08lx", (unsigned long)r_mem->start);
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 3082183bd66a..fc70920c4dda 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1132,7 +1132,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs,
int i, result;
unsigned int temp;
int block_data = msgs->flags & I2C_M_RECV_LEN;
- int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data;
+ int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE &&
+ msgs->len >= DMA_THRESHOLD && !block_data;
dev_dbg(&i2c_imx->adapter.dev,
"<%s> write slave address: addr=0x%x\n",
@@ -1298,7 +1299,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter,
result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic);
} else {
if (!atomic &&
- i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD)
+ i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD &&
+ msgs[i].flags & I2C_M_DMA_SAFE)
result = i2c_imx_dma_write(i2c_imx, &msgs[i]);
else
result = i2c_imx_write(i2c_imx, &msgs[i], atomic);
diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c
index 0c365b57d957..83457359ec45 100644
--- a/drivers/i2c/busses/i2c-npcm7xx.c
+++ b/drivers/i2c/busses/i2c-npcm7xx.c
@@ -2393,8 +2393,17 @@ static struct platform_driver npcm_i2c_bus_driver = {
static int __init npcm_i2c_init(void)
{
+ int ret;
+
npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL);
- return platform_driver_register(&npcm_i2c_bus_driver);
+
+ ret = platform_driver_register(&npcm_i2c_bus_driver);
+ if (ret) {
+ debugfs_remove_recursive(npcm_i2c_debugfs_dir);
+ return ret;
+ }
+
+ return 0;
}
module_init(npcm_i2c_init);
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 84a77512614d..8fce98bb77ff 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -626,7 +626,6 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i
dev_err(gi2c->se.dev, "I2C timeout gpi flags:%d addr:0x%x\n",
gi2c->cur->flags, gi2c->cur->addr);
gi2c->err = -ETIMEDOUT;
- goto err;
}
if (gi2c->err) {
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index b4edf10e8fd0..7539b0740351 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -467,6 +467,7 @@ static int i2c_device_probe(struct device *dev)
{
struct i2c_client *client = i2c_verify_client(dev);
struct i2c_driver *driver;
+ bool do_power_on;
int status;
if (!client)
@@ -545,8 +546,8 @@ static int i2c_device_probe(struct device *dev)
if (status < 0)
goto err_clear_wakeup_irq;
- status = dev_pm_domain_attach(&client->dev,
- !i2c_acpi_waive_d0_probe(dev));
+ do_power_on = !i2c_acpi_waive_d0_probe(dev);
+ status = dev_pm_domain_attach(&client->dev, do_power_on);
if (status)
goto err_clear_wakeup_irq;
@@ -585,7 +586,7 @@ static int i2c_device_probe(struct device *dev)
err_release_driver_resources:
devres_release_group(&client->dev, client->devres_group_id);
err_detach_pm_domain:
- dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev));
+ dev_pm_domain_detach(&client->dev, do_power_on);
err_clear_wakeup_irq:
dev_pm_clear_wake_irq(&client->dev);
device_init_wakeup(&client->dev, false);
@@ -610,7 +611,7 @@ static void i2c_device_remove(struct device *dev)
devres_release_group(&client->dev, client->devres_group_id);
- dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev));
+ dev_pm_domain_detach(&client->dev, true);
dev_pm_clear_wake_irq(&client->dev);
device_init_wakeup(&client->dev, false);
diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c
index 3a4952935366..3d9c5758d8a4 100644
--- a/drivers/input/touchscreen/raydium_i2c_ts.c
+++ b/drivers/input/touchscreen/raydium_i2c_ts.c
@@ -211,12 +211,14 @@ static int raydium_i2c_send(struct i2c_client *client,
error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer));
if (likely(!error))
- return 0;
+ goto out;
msleep(RM_RETRY_DELAY_MS);
} while (++tries < RM_MAX_RETRIES);
dev_err(&client->dev, "%s failed: %d\n", __func__, error);
+out:
+ kfree(tx_buf);
return error;
}
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 5a8f780e7ffd..bc94059a5b87 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void)
info = dmar_alloc_pci_notify_info(dev,
BUS_NOTIFY_ADD_DEVICE);
if (!info) {
+ pci_dev_put(dev);
return dmar_dev_scope_status;
} else {
dmar_pci_bus_add_dev(info);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 996a8b5ee5ee..644ca49e8cf8 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1396,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain)
spin_unlock_irqrestore(&domain->lock, flags);
}
+/*
+ * The extra devTLB flush quirk impacts those QAT devices with PCI device
+ * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
+ * check because it applies only to the built-in QAT devices and it doesn't
+ * grant additional privileges.
+ */
+#define BUGGY_QAT_DEVID_MASK 0x4940
+static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
+{
+ if (pdev->vendor != PCI_VENDOR_ID_INTEL)
+ return false;
+
+ if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK)
+ return false;
+
+ return true;
+}
+
static void iommu_enable_pci_caps(struct device_domain_info *info)
{
struct pci_dev *pdev;
@@ -1478,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
qdep = info->ats_qdep;
qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
qdep, addr, mask);
+ quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep);
}
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
@@ -3854,8 +3873,10 @@ static inline bool has_external_pci(void)
struct pci_dev *pdev = NULL;
for_each_pci_dev(pdev)
- if (pdev->external_facing)
+ if (pdev->external_facing) {
+ pci_dev_put(pdev);
return true;
+ }
return false;
}
@@ -4490,9 +4511,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
if (dev_is_pci(dev)) {
if (ecap_dev_iotlb_support(iommu->ecap) &&
pci_ats_supported(pdev) &&
- dmar_ats_supported(pdev, iommu))
+ dmar_ats_supported(pdev, iommu)) {
info->ats_supported = 1;
-
+ info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev);
+ }
if (sm_supported(iommu)) {
if (pasid_supported(iommu)) {
int features = pci_pasid_features(pdev);
@@ -4931,3 +4953,48 @@ static void __init check_tylersburg_isoch(void)
pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
vtisochctrl);
}
+
+/*
+ * Here we deal with a device TLB defect where device may inadvertently issue ATS
+ * invalidation completion before posted writes initiated with translated address
+ * that utilized translations matching the invalidation address range, violating
+ * the invalidation completion ordering.
+ * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is
+ * vulnerable to this defect. In other words, any dTLB invalidation initiated not
+ * under the control of the trusted/privileged host device driver must use this
+ * quirk.
+ * Device TLBs are invalidated under the following six conditions:
+ * 1. Device driver does DMA API unmap IOVA
+ * 2. Device driver unbind a PASID from a process, sva_unbind_device()
+ * 3. PASID is torn down, after PASID cache is flushed. e.g. process
+ * exit_mmap() due to crash
+ * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where
+ * VM has to free pages that were unmapped
+ * 5. Userspace driver unmaps a DMA buffer
+ * 6. Cache invalidation in vSVA usage (upcoming)
+ *
+ * For #1 and #2, device drivers are responsible for stopping DMA traffic
+ * before unmap/unbind. For #3, iommu driver gets mmu_notifier to
+ * invalidate TLB the same way as normal user unmap which will use this quirk.
+ * The dTLB invalidation after PASID cache flush does not need this quirk.
+ *
+ * As a reminder, #6 will *NEED* this quirk as we enable nested translation.
+ */
+void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
+ unsigned long address, unsigned long mask,
+ u32 pasid, u16 qdep)
+{
+ u16 sid;
+
+ if (likely(!info->dtlb_extra_inval))
+ return;
+
+ sid = PCI_DEVID(info->bus, info->devfn);
+ if (pasid == PASID_RID2PASID) {
+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+ qdep, address, mask);
+ } else {
+ qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid,
+ pasid, qdep, address, mask);
+ }
+}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 92023dff9513..db9df7c3790c 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -623,6 +623,7 @@ struct device_domain_info {
u8 pri_enabled:1;
u8 ats_supported:1;
u8 ats_enabled:1;
+ u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */
u8 ats_qdep;
struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
@@ -728,6 +729,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr,
unsigned int size_order);
+void quirk_extra_dev_tlb_flush(struct device_domain_info *info,
+ unsigned long address, unsigned long pages,
+ u32 pasid, u16 qdep);
void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
u32 pasid);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 7d08eb034f2d..03b25358946c 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm,
return;
qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
- if (info->ats_enabled)
+ if (info->ats_enabled) {
qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
svm->pasid, sdev->qdep, address,
order_base_2(pages));
+ quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
+ svm->pasid, sdev->qdep);
+ }
}
static void intel_flush_svm_range_dev(struct intel_svm *svm,
@@ -745,12 +748,16 @@ bad_req:
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
- if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req))
- handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ if (!pdev)
+ goto bad_req;
- trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
- req->priv_data[0], req->priv_data[1],
- iommu->prq_seq_number++);
+ if (intel_svm_prq_report(iommu, &pdev->dev, req))
+ handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
+ else
+ trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
+ req->priv_data[0], req->priv_data[1],
+ iommu->prq_seq_number++);
+ pci_dev_put(pdev);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c
index 542dde9d2609..144027035892 100644
--- a/drivers/media/common/videobuf2/frame_vector.c
+++ b/drivers/media/common/videobuf2/frame_vector.c
@@ -35,11 +35,7 @@
int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
struct frame_vector *vec)
{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- int ret_pin_user_pages_fast = 0;
- int ret = 0;
- int err;
+ int ret;
if (nr_frames == 0)
return 0;
@@ -52,57 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
ret = pin_user_pages_fast(start, nr_frames,
FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
(struct page **)(vec->ptrs));
- if (ret > 0) {
- vec->got_ref = true;
- vec->is_pfns = false;
- goto out_unlocked;
- }
- ret_pin_user_pages_fast = ret;
-
- mmap_read_lock(mm);
- vec->got_ref = false;
- vec->is_pfns = true;
- ret = 0;
- do {
- unsigned long *nums = frame_vector_pfns(vec);
-
- vma = vma_lookup(mm, start);
- if (!vma)
- break;
-
- while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) {
- err = follow_pfn(vma, start, &nums[ret]);
- if (err) {
- if (ret)
- goto out;
- // If follow_pfn() returns -EINVAL, then this
- // is not an IO mapping or a raw PFN mapping.
- // In that case, return the original error from
- // pin_user_pages_fast(). Otherwise this
- // function would return -EINVAL when
- // pin_user_pages_fast() returned -ENOMEM,
- // which makes debugging hard.
- if (err == -EINVAL && ret_pin_user_pages_fast)
- ret = ret_pin_user_pages_fast;
- else
- ret = err;
- goto out;
- }
- start += PAGE_SIZE;
- ret++;
- }
- /* Bail out if VMA doesn't completely cover the tail page. */
- if (start < vma->vm_end)
- break;
- } while (ret < nr_frames);
-out:
- mmap_read_unlock(mm);
-out_unlocked:
- if (!ret)
- ret = -EFAULT;
- if (ret > 0)
- vec->nr_frames = ret;
- return ret;
+ vec->got_ref = true;
+ vec->is_pfns = false;
+ vec->nr_frames = ret;
+
+ if (likely(ret > 0))
+ return ret;
+
+ /* This used to (racily) return non-refcounted pfns. Let people know */
+ WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping");
+ vec->nr_frames = 0;
+ return ret ? ret : -EFAULT;
}
EXPORT_SYMBOL(get_vaddr_frames);
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index ab9697f3b5f1..92efc4676df6 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -813,7 +813,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
num_buffers = max_t(unsigned int, *count, q->min_buffers_needed);
num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME);
memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
+ /*
+ * Set this now to ensure that drivers see the correct q->memory value
+ * in the queue_setup op.
+ */
+ mutex_lock(&q->mmap_lock);
q->memory = memory;
+ mutex_unlock(&q->mmap_lock);
set_queue_coherency(q, non_coherent_mem);
/*
@@ -823,22 +829,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes,
plane_sizes, q->alloc_devs);
if (ret)
- return ret;
+ goto error;
/* Check that driver has set sane values */
- if (WARN_ON(!num_planes))
- return -EINVAL;
+ if (WARN_ON(!num_planes)) {
+ ret = -EINVAL;
+ goto error;
+ }
for (i = 0; i < num_planes; i++)
- if (WARN_ON(!plane_sizes[i]))
- return -EINVAL;
+ if (WARN_ON(!plane_sizes[i])) {
+ ret = -EINVAL;
+ goto error;
+ }
/* Finally, allocate buffers and video memory */
allocated_buffers =
__vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes);
if (allocated_buffers == 0) {
dprintk(q, 1, "memory allocation failed\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto error;
}
/*
@@ -879,7 +890,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
if (ret < 0) {
/*
* Note: __vb2_queue_free() will subtract 'allocated_buffers'
- * from q->num_buffers.
+ * from q->num_buffers and it will reset q->memory to
+ * VB2_MEMORY_UNKNOWN.
*/
__vb2_queue_free(q, allocated_buffers);
mutex_unlock(&q->mmap_lock);
@@ -895,6 +907,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
q->waiting_for_buffers = !q->is_output;
return 0;
+
+error:
+ mutex_lock(&q->mmap_lock);
+ q->memory = VB2_MEMORY_UNKNOWN;
+ mutex_unlock(&q->mmap_lock);
+ return ret;
}
EXPORT_SYMBOL_GPL(vb2_core_reqbufs);
@@ -906,6 +924,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
unsigned int num_planes = 0, num_buffers, allocated_buffers;
unsigned plane_sizes[VB2_MAX_PLANES] = { };
bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT;
+ bool no_previous_buffers = !q->num_buffers;
int ret;
if (q->num_buffers == VB2_MAX_FRAME) {
@@ -913,13 +932,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
return -ENOBUFS;
}
- if (!q->num_buffers) {
+ if (no_previous_buffers) {
if (q->waiting_in_dqbuf && *count) {
dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n");
return -EBUSY;
}
memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
+ /*
+ * Set this now to ensure that drivers see the correct q->memory
+ * value in the queue_setup op.
+ */
+ mutex_lock(&q->mmap_lock);
q->memory = memory;
+ mutex_unlock(&q->mmap_lock);
q->waiting_for_buffers = !q->is_output;
set_queue_coherency(q, non_coherent_mem);
} else {
@@ -945,14 +970,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
ret = call_qop(q, queue_setup, q, &num_buffers,
&num_planes, plane_sizes, q->alloc_devs);
if (ret)
- return ret;
+ goto error;
/* Finally, allocate buffers and video memory */
allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers,
num_planes, plane_sizes);
if (allocated_buffers == 0) {
dprintk(q, 1, "memory allocation failed\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto error;
}
/*
@@ -983,7 +1009,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
if (ret < 0) {
/*
* Note: __vb2_queue_free() will subtract 'allocated_buffers'
- * from q->num_buffers.
+ * from q->num_buffers and it will reset q->memory to
+ * VB2_MEMORY_UNKNOWN.
*/
__vb2_queue_free(q, allocated_buffers);
mutex_unlock(&q->mmap_lock);
@@ -998,6 +1025,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
*count = allocated_buffers;
return 0;
+
+error:
+ if (no_previous_buffers) {
+ mutex_lock(&q->mmap_lock);
+ q->memory = VB2_MEMORY_UNKNOWN;
+ mutex_unlock(&q->mmap_lock);
+ }
+ return ret;
}
EXPORT_SYMBOL_GPL(vb2_core_create_bufs);
@@ -2165,6 +2200,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off,
unsigned int buffer, plane;
/*
+ * Sanity checks to ensure the lock is held, MEMORY_MMAP is
+ * used and fileio isn't active.
+ */
+ lockdep_assert_held(&q->mmap_lock);
+
+ if (q->memory != VB2_MEMORY_MMAP) {
+ dprintk(q, 1, "queue is not currently set up for mmap\n");
+ return -EINVAL;
+ }
+
+ if (vb2_fileio_is_active(q)) {
+ dprintk(q, 1, "file io in progress\n");
+ return -EBUSY;
+ }
+
+ /*
* Go over all buffers and their planes, comparing the given offset
* with an offset assigned to each plane. If a match is found,
* return its buffer and plane numbers.
@@ -2265,11 +2316,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
int ret;
unsigned long length;
- if (q->memory != VB2_MEMORY_MMAP) {
- dprintk(q, 1, "queue is not currently set up for mmap\n");
- return -EINVAL;
- }
-
/*
* Check memory area access mode.
*/
@@ -2291,14 +2337,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma)
mutex_lock(&q->mmap_lock);
- if (vb2_fileio_is_active(q)) {
- dprintk(q, 1, "mmap: file io in progress\n");
- ret = -EBUSY;
- goto unlock;
- }
-
/*
- * Find the plane corresponding to the offset passed by userspace.
+ * Find the plane corresponding to the offset passed by userspace. This
+ * will return an error if not MEMORY_MMAP or file I/O is in progress.
*/
ret = __find_plane_by_offset(q, off, &buffer, &plane);
if (ret)
@@ -2351,22 +2392,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q,
void *vaddr;
int ret;
- if (q->memory != VB2_MEMORY_MMAP) {
- dprintk(q, 1, "queue is not currently set up for mmap\n");
- return -EINVAL;
- }
+ mutex_lock(&q->mmap_lock);
/*
- * Find the plane corresponding to the offset passed by userspace.
+ * Find the plane corresponding to the offset passed by userspace. This
+ * will return an error if not MEMORY_MMAP or file I/O is in progress.
*/
ret = __find_plane_by_offset(q, off, &buffer, &plane);
if (ret)
- return ret;
+ goto unlock;
vb = q->bufs[buffer];
vaddr = vb2_plane_vaddr(vb, plane);
+ mutex_unlock(&q->mmap_lock);
return vaddr ? (unsigned long)vaddr : -EINVAL;
+
+unlock:
+ mutex_unlock(&q->mmap_lock);
+ return ret;
}
EXPORT_SYMBOL_GPL(vb2_get_unmapped_area);
#endif
diff --git a/drivers/media/dvb-frontends/stv0288.c b/drivers/media/dvb-frontends/stv0288.c
index 3d54a0ec86af..3ae1f3a2f142 100644
--- a/drivers/media/dvb-frontends/stv0288.c
+++ b/drivers/media/dvb-frontends/stv0288.c
@@ -440,9 +440,8 @@ static int stv0288_set_frontend(struct dvb_frontend *fe)
struct stv0288_state *state = fe->demodulator_priv;
struct dtv_frontend_properties *c = &fe->dtv_property_cache;
- char tm;
- unsigned char tda[3];
- u8 reg, time_out = 0;
+ u8 tda[3], reg, time_out = 0;
+ s8 tm;
dprintk("%s : FE_SET_FRONTEND\n", __func__);
diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c
index 003c32fed3f7..942d0005c55e 100644
--- a/drivers/media/v4l2-core/v4l2-dv-timings.c
+++ b/drivers/media/v4l2-core/v4l2-dv-timings.c
@@ -145,6 +145,8 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t,
const struct v4l2_bt_timings *bt = &t->bt;
const struct v4l2_bt_timings_cap *cap = &dvcap->bt;
u32 caps = cap->capabilities;
+ const u32 max_vert = 10240;
+ u32 max_hor = 3 * bt->width;
if (t->type != V4L2_DV_BT_656_1120)
return false;
@@ -166,14 +168,20 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t,
if (!bt->interlaced &&
(bt->il_vbackporch || bt->il_vsync || bt->il_vfrontporch))
return false;
- if (bt->hfrontporch > 2 * bt->width ||
- bt->hsync > 1024 || bt->hbackporch > 1024)
+ /*
+ * Some video receivers cannot properly separate the frontporch,
+ * backporch and sync values, and instead they only have the total
+ * blanking. That can be assigned to any of these three fields.
+ * So just check that none of these are way out of range.
+ */
+ if (bt->hfrontporch > max_hor ||
+ bt->hsync > max_hor || bt->hbackporch > max_hor)
return false;
- if (bt->vfrontporch > 4096 ||
- bt->vsync > 128 || bt->vbackporch > 4096)
+ if (bt->vfrontporch > max_vert ||
+ bt->vsync > max_vert || bt->vbackporch > max_vert)
return false;
- if (bt->interlaced && (bt->il_vfrontporch > 4096 ||
- bt->il_vsync > 128 || bt->il_vbackporch > 4096))
+ if (bt->interlaced && (bt->il_vfrontporch > max_vert ||
+ bt->il_vsync > max_vert || bt->il_vbackporch > max_vert))
return false;
return fnc == NULL || fnc(t, fnc_handle);
}
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index c5de202f530a..de1cc9e1ae57 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1484,6 +1484,11 @@ void mmc_init_erase(struct mmc_card *card)
card->pref_erase = 0;
}
+static bool is_trim_arg(unsigned int arg)
+{
+ return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG;
+}
+
static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card,
unsigned int arg, unsigned int qty)
{
@@ -1766,7 +1771,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
!(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN))
return -EOPNOTSUPP;
- if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) &&
+ if (mmc_card_mmc(card) && is_trim_arg(arg) &&
!(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN))
return -EOPNOTSUPP;
@@ -1796,7 +1801,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
* identified by the card->eg_boundary flag.
*/
rem = card->erase_size - (from % card->erase_size);
- if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) {
+ if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) {
err = mmc_do_erase(card, from, from + rem - 1, arg);
from += rem;
if ((err) || (to <= from))
diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c
index 8d9bceeff986..155ce2bdfe62 100644
--- a/drivers/mmc/core/mmc_test.c
+++ b/drivers/mmc/core/mmc_test.c
@@ -3179,7 +3179,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card,
struct mmc_test_dbgfs_file *df;
if (card->debugfs_root)
- debugfs_create_file(name, mode, card->debugfs_root, card, fops);
+ file = debugfs_create_file(name, mode, card->debugfs_root,
+ card, fops);
df = kmalloc(sizeof(*df), GFP_KERNEL);
if (!df) {
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index df941438aef5..26bc59b5a7cc 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -2588,13 +2588,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev,
return PTR_ERR(host->src_clk_cg);
}
- host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg");
+ /* If present, always enable for this clock gate */
+ host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg");
if (IS_ERR(host->sys_clk_cg))
host->sys_clk_cg = NULL;
- /* If present, always enable for this clock gate */
- clk_prepare_enable(host->sys_clk_cg);
-
host->bulk_clks[0].id = "pclk_cg";
host->bulk_clks[1].id = "axi_cg";
host->bulk_clks[2].id = "ahb_cg";
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 31ea0a2fce35..ffeb5759830f 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1512,7 +1512,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc)
* system resume back.
*/
cqhci_writel(cq_host, 0, CQHCI_CTL);
- if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT)
+ if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT)
dev_err(mmc_dev(host->mmc),
"failed to exit halt state when enable CQE\n");
diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
index b92a408f138d..bec3f9e3cd3f 100644
--- a/drivers/mmc/host/sdhci-sprd.c
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -470,7 +470,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
}
if (IS_ERR(sprd_host->pinctrl))
- return 0;
+ goto reset;
switch (ios->signal_voltage) {
case MMC_SIGNAL_VOLTAGE_180:
@@ -498,6 +498,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios)
/* Wait for 300 ~ 500 us for pin state stable */
usleep_range(300, 500);
+
+reset:
sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
return 0;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index fef03de85b99..c7ad32a75b57 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -373,6 +373,7 @@ static void sdhci_init(struct sdhci_host *host, int soft)
if (soft) {
/* force clock reconfiguration */
host->clock = 0;
+ host->reinit_uhs = true;
mmc->ops->set_ios(mmc, &mmc->ios);
}
}
@@ -2293,11 +2294,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
}
EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
+static bool sdhci_timing_has_preset(unsigned char timing)
+{
+ switch (timing) {
+ case MMC_TIMING_UHS_SDR12:
+ case MMC_TIMING_UHS_SDR25:
+ case MMC_TIMING_UHS_SDR50:
+ case MMC_TIMING_UHS_SDR104:
+ case MMC_TIMING_UHS_DDR50:
+ case MMC_TIMING_MMC_DDR52:
+ return true;
+ };
+ return false;
+}
+
+static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing)
+{
+ return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
+ sdhci_timing_has_preset(timing);
+}
+
+static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios)
+{
+ /*
+ * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK
+ * Frequency. Check if preset values need to be enabled, or the Driver
+ * Strength needs updating. Note, clock changes are handled separately.
+ */
+ return !host->preset_enabled &&
+ (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type);
+}
+
void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
{
struct sdhci_host *host = mmc_priv(mmc);
+ bool reinit_uhs = host->reinit_uhs;
+ bool turning_on_clk = false;
u8 ctrl;
+ host->reinit_uhs = false;
+
if (ios->power_mode == MMC_POWER_UNDEFINED)
return;
@@ -2323,6 +2359,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
sdhci_enable_preset_value(host, false);
if (!ios->clock || ios->clock != host->clock) {
+ turning_on_clk = ios->clock && !host->clock;
+
host->ops->set_clock(host, ios->clock);
host->clock = ios->clock;
@@ -2349,6 +2387,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
host->ops->set_bus_width(host, ios->bus_width);
+ /*
+ * Special case to avoid multiple clock changes during voltage
+ * switching.
+ */
+ if (!reinit_uhs &&
+ turning_on_clk &&
+ host->timing == ios->timing &&
+ host->version >= SDHCI_SPEC_300 &&
+ !sdhci_presetable_values_change(host, ios))
+ return;
+
ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) {
@@ -2392,6 +2441,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
}
sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+ host->drv_type = ios->drv_type;
} else {
/*
* According to SDHC Spec v3.00, if the Preset Value
@@ -2419,19 +2469,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
host->ops->set_uhs_signaling(host, ios->timing);
host->timing = ios->timing;
- if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
- ((ios->timing == MMC_TIMING_UHS_SDR12) ||
- (ios->timing == MMC_TIMING_UHS_SDR25) ||
- (ios->timing == MMC_TIMING_UHS_SDR50) ||
- (ios->timing == MMC_TIMING_UHS_SDR104) ||
- (ios->timing == MMC_TIMING_UHS_DDR50) ||
- (ios->timing == MMC_TIMING_MMC_DDR52))) {
+ if (sdhci_preset_needed(host, ios->timing)) {
u16 preset;
sdhci_enable_preset_value(host, true);
preset = sdhci_get_preset_value(host);
ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK,
preset);
+ host->drv_type = ios->drv_type;
}
/* Re-enable SD Clock */
@@ -3768,6 +3813,7 @@ int sdhci_resume_host(struct sdhci_host *host)
sdhci_init(host, 0);
host->pwr = 0;
host->clock = 0;
+ host->reinit_uhs = true;
mmc->ops->set_ios(mmc, &mmc->ios);
} else {
sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER));
@@ -3830,6 +3876,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset)
/* Force clock and power re-program */
host->pwr = 0;
host->clock = 0;
+ host->reinit_uhs = true;
mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios);
mmc->ops->set_ios(mmc, &mmc->ios);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index d750c464bd1e..87a3aaa07438 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -524,6 +524,8 @@ struct sdhci_host {
unsigned int clock; /* Current clock (MHz) */
u8 pwr; /* Current voltage */
+ u8 drv_type; /* Current UHS-I driver type */
+ bool reinit_uhs; /* Force UHS-related re-initialization */
bool runtime_suspended; /* Host is runtime suspended */
bool bus_on; /* Bus power prevents runtime suspend */
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f298b9b3eb77..b9a882f182d2 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3247,7 +3247,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
goto out;
saddr = &combined->ip6.saddr;
- daddr = &combined->ip6.saddr;
+ daddr = &combined->ip6.daddr;
slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n",
__func__, slave->dev->name, bond_slave_state(slave),
diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c
index 094197780776..dc7192ecb001 100644
--- a/drivers/net/can/can327.c
+++ b/drivers/net/can/can327.c
@@ -263,8 +263,10 @@ static void can327_feed_frame_to_netdev(struct can327 *elm, struct sk_buff *skb)
{
lockdep_assert_held(&elm->lock);
- if (!netif_running(elm->dev))
+ if (!netif_running(elm->dev)) {
+ kfree_skb(skb);
return;
+ }
/* Queue for NAPI pickup.
* rx-offload will update stats and LEDs for us.
@@ -794,9 +796,9 @@ static int can327_netdev_close(struct net_device *dev)
netif_stop_queue(dev);
- /* Give UART one final chance to flush. */
- clear_bit(TTY_DO_WRITE_WAKEUP, &elm->tty->flags);
- flush_work(&elm->tx_work);
+ /* We don't flush the UART TX queue here, as we want final stop
+ * commands (like the above dummy char) to be flushed out.
+ */
can_rx_offload_disable(&elm->offload);
elm->can.state = CAN_STATE_STOPPED;
@@ -1067,12 +1069,15 @@ static void can327_ldisc_close(struct tty_struct *tty)
{
struct can327 *elm = (struct can327 *)tty->disc_data;
- /* unregister_netdev() calls .ndo_stop() so we don't have to.
- * Our .ndo_stop() also flushes the TTY write wakeup handler,
- * so we can safely set elm->tty = NULL after this.
- */
+ /* unregister_netdev() calls .ndo_stop() so we don't have to. */
unregister_candev(elm->dev);
+ /* Give UART one final chance to flush.
+ * No need to clear TTY_DO_WRITE_WAKEUP since .write_wakeup() is
+ * serialised against .close() and will not be called once we return.
+ */
+ flush_work(&elm->tx_work);
+
/* Mark channel as dead */
spin_lock_bh(&elm->lock);
tty->disc_data = NULL;
diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c
index 194c86e0f340..8f6dccd5a587 100644
--- a/drivers/net/can/cc770/cc770_isa.c
+++ b/drivers/net/can/cc770/cc770_isa.c
@@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev)
if (err) {
dev_err(&pdev->dev,
"couldn't register device (err=%d)\n", err);
- goto exit_unmap;
+ goto exit_free;
}
dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n",
priv->reg_base, dev->irq);
return 0;
- exit_unmap:
+exit_free:
+ free_cc770dev(dev);
+exit_unmap:
if (mem[idx])
iounmap(base);
- exit_release:
+exit_release:
if (mem[idx])
release_mem_region(mem[idx], iosize);
else
release_region(port[idx], iosize);
- exit:
+exit:
return err;
}
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 00d11e95fd98..e5575d2755e4 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1909,7 +1909,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev)
cdev->hclk = devm_clk_get(cdev->dev, "hclk");
cdev->cclk = devm_clk_get(cdev->dev, "cclk");
- if (IS_ERR(cdev->cclk)) {
+ if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) {
dev_err(cdev->dev, "no clock found\n");
ret = -ENODEV;
}
diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c
index 8f184a852a0a..f2219aa2824b 100644
--- a/drivers/net/can/m_can/m_can_pci.c
+++ b/drivers/net/can/m_can/m_can_pci.c
@@ -120,7 +120,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
ret = pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_ALL_TYPES);
if (ret < 0)
- return ret;
+ goto err_free_dev;
mcan_class->dev = &pci->dev;
mcan_class->net->irq = pci_irq_vector(pci, 0);
@@ -132,7 +132,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
ret = m_can_class_register(mcan_class);
if (ret)
- goto err;
+ goto err_free_irq;
/* Enable interrupt control at CAN wrapper IP */
writel(0x1, base + CTL_CSR_INT_CTL_OFFSET);
@@ -144,8 +144,10 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
return 0;
-err:
+err_free_irq:
pci_free_irq_vectors(pci);
+err_free_dev:
+ m_can_class_free_dev(mcan_class->net);
return ret;
}
@@ -161,6 +163,7 @@ static void m_can_pci_remove(struct pci_dev *pci)
writel(0x0, priv->base + CTL_CSR_INT_CTL_OFFSET);
m_can_class_unregister(mcan_class);
+ m_can_class_free_dev(mcan_class->net);
pci_free_irq_vectors(pci);
}
diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c
index d513fac50718..db3e767d5320 100644
--- a/drivers/net/can/sja1000/sja1000_isa.c
+++ b/drivers/net/can/sja1000/sja1000_isa.c
@@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev)
if (err) {
dev_err(&pdev->dev, "registering %s failed (err=%d)\n",
DRV_NAME, err);
- goto exit_unmap;
+ goto exit_free;
}
dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n",
DRV_NAME, priv->reg_base, dev->irq);
return 0;
- exit_unmap:
+exit_free:
+ free_sja1000dev(dev);
+exit_unmap:
if (mem[idx])
iounmap(base);
- exit_release:
+exit_release:
if (mem[idx])
release_mem_region(mem[idx], iosize);
else
release_region(port[idx], iosize);
- exit:
+exit:
return err;
}
diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c
index fbb34139daa1..f4db77007c13 100644
--- a/drivers/net/can/slcan/slcan-core.c
+++ b/drivers/net/can/slcan/slcan-core.c
@@ -864,12 +864,14 @@ static void slcan_close(struct tty_struct *tty)
{
struct slcan *sl = (struct slcan *)tty->disc_data;
- /* unregister_netdev() calls .ndo_stop() so we don't have to.
- * Our .ndo_stop() also flushes the TTY write wakeup handler,
- * so we can safely set sl->tty = NULL after this.
- */
unregister_candev(sl->dev);
+ /*
+ * The netdev needn't be UP (so .ndo_stop() is not called). Hence make
+ * sure this is not running before freeing it up.
+ */
+ flush_work(&sl->tx_work);
+
/* Mark channel as dead */
spin_lock_bh(&sl->lock);
tty->disc_data = NULL;
diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c
index 81b88e9e5bdc..42323f5e6f3a 100644
--- a/drivers/net/can/usb/esd_usb.c
+++ b/drivers/net/can/usb/esd_usb.c
@@ -234,6 +234,10 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv,
u8 rxerr = msg->msg.rx.data[2];
u8 txerr = msg->msg.rx.data[3];
+ netdev_dbg(priv->netdev,
+ "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n",
+ msg->msg.rx.dlc, state, ecc, rxerr, txerr);
+
skb = alloc_can_err_skb(priv->netdev, &cf);
if (skb == NULL) {
stats->rx_dropped++;
@@ -260,6 +264,8 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv,
break;
default:
priv->can.state = CAN_STATE_ERROR_ACTIVE;
+ txerr = 0;
+ rxerr = 0;
break;
}
} else {
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index 25f863b4f5f0..ddb7c5735c9a 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -2091,8 +2091,11 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx)
netdev->dev_port = channel_idx;
ret = register_candev(netdev);
- if (ret)
+ if (ret) {
+ es58x_dev->netdev[channel_idx] = NULL;
+ free_candev(netdev);
return ret;
+ }
netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0),
es58x_dev->param->dql_min_limit);
diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 218b098b261d..47619e9cb005 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -47,6 +47,10 @@
#define MCBA_VER_REQ_USB 1
#define MCBA_VER_REQ_CAN 2
+/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */
+#define MCBA_VER_TERMINATION_ON 0
+#define MCBA_VER_TERMINATION_OFF 1
+
#define MCBA_SIDL_EXID_MASK 0x8
#define MCBA_DLC_MASK 0xf
#define MCBA_DLC_RTR_MASK 0x40
@@ -463,7 +467,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv,
priv->usb_ka_first_pass = false;
}
- if (msg->termination_state)
+ if (msg->termination_state == MCBA_VER_TERMINATION_ON)
priv->can.termination = MCBA_TERMINATION_ENABLED;
else
priv->can.termination = MCBA_TERMINATION_DISABLED;
@@ -785,9 +789,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term)
};
if (term == MCBA_TERMINATION_ENABLED)
- usb_msg.termination = 1;
+ usb_msg.termination = MCBA_VER_TERMINATION_ON;
else
- usb_msg.termination = 0;
+ usb_msg.termination = MCBA_VER_TERMINATION_OFF;
mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg);
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 438e46af03e9..80f07bd20593 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -961,7 +961,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = {
{ .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", },
{ .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", },
{ .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", },
- { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", },
+ { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", },
{ .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", },
{ .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", },
{ .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", },
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 2479be3a1e35..937cb22cb3d4 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -833,10 +833,13 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port,
chip->info->ops->phylink_get_caps(chip, port, config);
- /* Internal ports need GMII for PHYLIB */
- if (mv88e6xxx_phy_is_internal(ds, port))
+ if (mv88e6xxx_phy_is_internal(ds, port)) {
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ config->supported_interfaces);
+ /* Internal ports with no phy-mode need GMII for PHYLIB */
__set_bit(PHY_INTERFACE_MODE_GMII,
config->supported_interfaces);
+ }
}
static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c
index 10c6fea1227f..bdbbff2a7909 100644
--- a/drivers/net/dsa/sja1105/sja1105_devlink.c
+++ b/drivers/net/dsa/sja1105/sja1105_devlink.c
@@ -95,6 +95,8 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds)
if (IS_ERR(region)) {
while (--i >= 0)
dsa_devlink_region_destroy(priv->regions[i]);
+
+ kfree(priv->regions);
return PTR_ERR(region);
}
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 412666111b0c..b70dcf32a26d 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1038,7 +1038,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
policing[bcast].sharindx = port;
/* Only SJA1110 has multicast policers */
- if (mcast <= table->ops->max_entry_count)
+ if (mcast < table->ops->max_entry_count)
policing[mcast].sharindx = port;
}
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index e104fb02817d..aa0d2f3aaeaa 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth)
if (dma_mapping_error(greth->dev, dma_addr)) {
if (netif_msg_ifup(greth))
dev_err(greth->dev, "Could not create initial DMA mapping\n");
+ dev_kfree_skb(skb);
goto cleanup;
}
greth->rx_skbuff[i] = skb;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index a08f221e30d4..ac4ea93bd8dd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -13,6 +13,7 @@
#include "aq_ptp.h"
#include "aq_filters.h"
#include "aq_macsec.h"
+#include "aq_main.h"
#include <linux/ptp_clock_kernel.h>
@@ -858,7 +859,7 @@ static int aq_set_ringparam(struct net_device *ndev,
if (netif_running(ndev)) {
ndev_running = true;
- dev_close(ndev);
+ aq_ndev_close(ndev);
}
cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min);
@@ -874,7 +875,7 @@ static int aq_set_ringparam(struct net_device *ndev,
goto err_exit;
if (ndev_running)
- err = dev_open(ndev, NULL);
+ err = aq_ndev_open(ndev);
err_exit:
return err;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index 8a0af371e7dc..77609dc0a08d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -58,7 +58,7 @@ struct net_device *aq_ndev_alloc(void)
return ndev;
}
-static int aq_ndev_open(struct net_device *ndev)
+int aq_ndev_open(struct net_device *ndev)
{
struct aq_nic_s *aq_nic = netdev_priv(ndev);
int err = 0;
@@ -88,7 +88,7 @@ err_exit:
return err;
}
-static int aq_ndev_close(struct net_device *ndev)
+int aq_ndev_close(struct net_device *ndev)
{
struct aq_nic_s *aq_nic = netdev_priv(ndev);
int err = 0;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h
index 99870865f66d..a78c1a168d8e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h
@@ -16,5 +16,7 @@ DECLARE_STATIC_KEY_FALSE(aq_xdp_locking_key);
void aq_ndev_schedule_work(struct work_struct *work);
struct net_device *aq_ndev_alloc(void);
+int aq_ndev_open(struct net_device *ndev);
+int aq_ndev_close(struct net_device *ndev);
#endif /* AQ_MAIN_H */
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 55dfdb34e37b..f4ca0c6c0f51 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -71,13 +71,14 @@ config BCM63XX_ENET
config BCMGENET
tristate "Broadcom GENET internal MAC support"
depends on HAS_IOMEM
+ depends on PTP_1588_CLOCK_OPTIONAL || !ARCH_BCM2835
select MII
select PHYLIB
select FIXED_PHY
select BCM7XXX_PHY
select MDIO_BCM_UNIMAC
select DIMLIB
- select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL)
+ select BROADCOM_PHY if ARCH_BCM2835
help
This driver supports the built-in Ethernet MACs found in the
Broadcom BCM7xxx Set Top Box family chipset.
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 98f3dc460ca7..f2f95493ec89 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -2239,7 +2239,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = register_netdev(netdev);
if (err) {
dev_err(dev, "Failed to register netdevice\n");
- goto err_unregister_interrupts;
+ goto err_destroy_workqueue;
}
nic->msg_enable = debug;
@@ -2248,6 +2248,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
+err_destroy_workqueue:
+ destroy_workqueue(nic->nicvf_rx_mode_wq);
err_unregister_interrupts:
nicvf_unregister_interrupts(nic);
err_free_netdev:
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
index cacd454ac696..c39b866e2582 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
@@ -132,6 +132,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) {
dev_err(dev, "DMA mapping failed\n");
+ kfree(cmd_buff);
return -EFAULT;
}
@@ -142,6 +143,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
DMA_TO_DEVICE);
if (err) {
dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err);
+ kfree(cmd_buff);
return err;
}
@@ -172,6 +174,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) {
dev_err(dev, "DMA mapping failed\n");
+ kfree(cmd_buff);
return -EFAULT;
}
@@ -182,6 +185,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
DMA_TO_DEVICE);
if (err) {
dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err);
+ kfree(cmd_buff);
return err;
}
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index f623c12eaf95..23e1a94b9ce4 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -74,7 +74,7 @@
#include "fec.h"
static void set_multicast_list(struct net_device *ndev);
-static void fec_enet_itr_coal_init(struct net_device *ndev);
+static void fec_enet_itr_coal_set(struct net_device *ndev);
#define DRIVER_NAME "fec"
@@ -1220,8 +1220,8 @@ fec_restart(struct net_device *ndev)
writel(0, fep->hwp + FEC_IMASK);
/* Init the interrupt coalescing */
- fec_enet_itr_coal_init(ndev);
-
+ if (fep->quirks & FEC_QUIRK_HAS_COALESCE)
+ fec_enet_itr_coal_set(ndev);
}
static int fec_enet_ipc_handle_init(struct fec_enet_private *fep)
@@ -2856,19 +2856,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev,
return 0;
}
-static void fec_enet_itr_coal_init(struct net_device *ndev)
-{
- struct ethtool_coalesce ec;
-
- ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT;
- ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT;
-
- ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT;
- ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT;
-
- fec_enet_set_coalesce(ndev, &ec, NULL, NULL);
-}
-
static int fec_enet_get_tunable(struct net_device *netdev,
const struct ethtool_tunable *tuna,
void *data)
@@ -3623,6 +3610,10 @@ static int fec_enet_init(struct net_device *ndev)
fep->rx_align = 0x3;
fep->tx_align = 0x3;
#endif
+ fep->rx_pkts_itr = FEC_ITR_ICFT_DEFAULT;
+ fep->tx_pkts_itr = FEC_ITR_ICFT_DEFAULT;
+ fep->rx_time_itr = FEC_ITR_ICTT_DEFAULT;
+ fep->tx_time_itr = FEC_ITR_ICTT_DEFAULT;
/* Check mask of the streaming and coherent API */
ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32));
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index 93846bace028..ce2571c16e43 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit)
skb->protocol = eth_type_trans(skb, dev);
napi_gro_receive(&priv->napi, skb);
dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_bytes += len;
next:
pos = (pos + 1) % rxq->num;
if (rx_pkts_num >= limit)
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index ffcf797dfa90..f867e9531117 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit)
skb->protocol = eth_type_trans(skb, dev);
napi_gro_receive(&priv->napi, skb);
dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_bytes += len;
next:
pos = dma_ring_incr(pos, RX_DESC_NUM);
}
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 560d1d442232..d3fdc290937f 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -1741,11 +1741,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb,
dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len,
DMA_TO_DEVICE);
/* If we can't map the skb, have the upper layer try later */
- if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
- dev_kfree_skb_any(skb);
- skb = NULL;
+ if (dma_mapping_error(&nic->pdev->dev, dma_addr))
return -ENOMEM;
- }
/*
* Use the last 4 bytes of the SKB payload packet as the CRC, used for
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 49e926959ad3..55cf2f62bb30 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5936,9 +5936,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
e1000_tx_queue(tx_ring, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
e1000_maybe_stop_tx(tx_ring,
- (MAX_SKB_FRAGS *
+ ((MAX_SKB_FRAGS + 1) *
DIV_ROUND_UP(PAGE_SIZE,
- adapter->tx_fifo_limit) + 2));
+ adapter->tx_fifo_limit) + 4));
if (!netdev_xmit_more() ||
netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 4a6630586ec9..fc373472e4e1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue;
**/
static int __init fm10k_init_module(void)
{
+ int ret;
+
pr_info("%s\n", fm10k_driver_string);
pr_info("%s\n", fm10k_copyright);
@@ -43,7 +45,13 @@ static int __init fm10k_init_module(void)
fm10k_dbg_init();
- return fm10k_register_pci_driver();
+ ret = fm10k_register_pci_driver();
+ if (ret) {
+ fm10k_dbg_exit();
+ destroy_workqueue(fm10k_workqueue);
+ }
+
+ return ret;
}
module_init(fm10k_init_module);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 4a6a6e48c615..f6fa63e4253c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -4464,11 +4464,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
return -EOPNOTSUPP;
/* First 4 bytes of L4 header */
- if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF))
- new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK;
- else if (!usr_ip4_spec->l4_4_bytes)
- new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
- else
+ if (usr_ip4_spec->l4_4_bytes)
return -EOPNOTSUPP;
/* Filtering on Type of Service is not supported. */
@@ -4507,11 +4503,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
else
return -EOPNOTSUPP;
- if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF))
- new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK;
- else if (!usr_ip6_spec->l4_4_bytes)
- new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
- else
+ if (usr_ip6_spec->l4_4_bytes)
return -EOPNOTSUPP;
/* Filtering on Traffic class is not supported. */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b5dcd15ced36..6416322d7c18 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -10655,6 +10655,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
}
/**
+ * i40e_clean_xps_state - clean xps state for every tx_ring
+ * @vsi: ptr to the VSI
+ **/
+static void i40e_clean_xps_state(struct i40e_vsi *vsi)
+{
+ int i;
+
+ if (vsi->tx_rings)
+ for (i = 0; i < vsi->num_queue_pairs; i++)
+ if (vsi->tx_rings[i])
+ clear_bit(__I40E_TX_XPS_INIT_DONE,
+ vsi->tx_rings[i]->state);
+}
+
+/**
* i40e_prep_for_reset - prep for the core to reset
* @pf: board private structure
*
@@ -10678,8 +10693,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf)
i40e_pf_quiesce_all_vsi(pf);
for (v = 0; v < pf->num_alloc_vsi; v++) {
- if (pf->vsi[v])
+ if (pf->vsi[v]) {
+ i40e_clean_xps_state(pf->vsi[v]);
pf->vsi[v]->seid = 0;
+ }
}
i40e_shutdown_adminq(&pf->hw);
@@ -16644,6 +16661,8 @@ static struct pci_driver i40e_driver = {
**/
static int __init i40e_init_module(void)
{
+ int err;
+
pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string);
pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
@@ -16661,7 +16680,14 @@ static int __init i40e_init_module(void)
}
i40e_dbg_init();
- return pci_register_driver(&i40e_driver);
+ err = pci_register_driver(&i40e_driver);
+ if (err) {
+ destroy_workqueue(i40e_wq);
+ i40e_dbg_exit();
+ return err;
+ }
+
+ return 0;
}
module_init(i40e_init_module);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 72ddcefc45b1..635f93d60318 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1578,6 +1578,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
i40e_cleanup_reset_vf(vf);
i40e_flush(hw);
+ usleep_range(20000, 40000);
clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states);
return true;
@@ -1701,6 +1702,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
}
i40e_flush(hw);
+ usleep_range(20000, 40000);
clear_bit(__I40E_VF_DISABLE, pf->state);
return true;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index d7465296f650..f71e132ede09 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -5196,6 +5196,8 @@ static struct pci_driver iavf_driver = {
**/
static int __init iavf_init_module(void)
{
+ int ret;
+
pr_info("iavf: %s\n", iavf_driver_string);
pr_info("%s\n", iavf_copyright);
@@ -5206,7 +5208,12 @@ static int __init iavf_init_module(void)
pr_err("%s: Failed to create workqueue\n", iavf_driver_name);
return -ENOMEM;
}
- return pci_register_driver(&iavf_driver);
+
+ ret = pci_register_driver(&iavf_driver);
+ if (ret)
+ destroy_workqueue(iavf_wq);
+
+ return ret;
}
module_init(iavf_init_module);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index e5f3e7680dc6..ff911af16a4b 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1413,6 +1413,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
*data = 1;
return -1;
}
+ wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8);
+ wr32(E1000_EIMS, BIT(0));
} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
shared_int = false;
if (request_irq(irq,
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 99933e89717a..e338fa572793 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4869,6 +4869,8 @@ static struct pci_driver ixgbevf_driver = {
**/
static int __init ixgbevf_init_module(void)
{
+ int err;
+
pr_info("%s\n", ixgbevf_driver_string);
pr_info("%s\n", ixgbevf_copyright);
ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name);
@@ -4877,7 +4879,13 @@ static int __init ixgbevf_init_module(void)
return -ENOMEM;
}
- return pci_register_driver(&ixgbevf_driver);
+ err = pci_register_driver(&ixgbevf_driver);
+ if (err) {
+ destroy_workqueue(ixgbevf_wq);
+ return err;
+ }
+
+ return 0;
}
module_init(ixgbevf_init_module);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index ff3e361e06e7..5aefaaff0871 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4271,7 +4271,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
/* Use the cpu associated to the rxq when it is online, in all
* the other cases, use the cpu 0 which can't be offline.
*/
- if (cpu_online(pp->rxq_def))
+ if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def))
elected_cpu = pp->rxq_def;
max_cpu = num_present_cpus();
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 282db6fe3b08..67aa02bb2b85 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -884,7 +884,7 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf,
static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx)
{
#ifdef CONFIG_DCB
- if (pfvf->pfc_alloc_status[qidx])
+ if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx])
return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx];
#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index e64318c110fd..6a01ab1a6e6f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -1134,7 +1134,12 @@ int otx2_init_tc(struct otx2_nic *nic)
return err;
tc->flow_ht_params = tc_flow_ht_params;
- return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
+ err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
+ if (err) {
+ kfree(tc->tc_entries_bitmap);
+ tc->tc_entries_bitmap = NULL;
+ }
+ return err;
}
EXPORT_SYMBOL(otx2_init_tc);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c
index 4046be0e86ff..a9a1028cb17b 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
@@ -457,7 +457,7 @@ prestera_kern_neigh_cache_find(struct prestera_switch *sw,
n_cache =
rhashtable_lookup_fast(&sw->router->kern_neigh_cache_ht, key,
__prestera_kern_neigh_cache_ht_params);
- return IS_ERR(n_cache) ? NULL : n_cache;
+ return n_cache;
}
static void
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
index aa080dc57ff0..02faaea2aefa 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c
@@ -330,7 +330,7 @@ prestera_nh_neigh_find(struct prestera_switch *sw,
nh_neigh = rhashtable_lookup_fast(&sw->router->nh_neigh_ht,
key, __prestera_nh_neigh_ht_params);
- return IS_ERR(nh_neigh) ? NULL : nh_neigh;
+ return nh_neigh;
}
struct prestera_nh_neigh *
@@ -484,7 +484,7 @@ __prestera_nexthop_group_find(struct prestera_switch *sw,
nh_grp = rhashtable_lookup_fast(&sw->router->nexthop_group_ht,
key, __prestera_nexthop_group_ht_params);
- return IS_ERR(nh_grp) ? NULL : nh_grp;
+ return nh_grp;
}
static struct prestera_nexthop_group *
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 74bd05e5dda2..e7a894ba5c3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1497,8 +1497,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf,
return -EFAULT;
err = sscanf(outlen_str, "%d", &outlen);
- if (err < 0)
- return err;
+ if (err != 1)
+ return -EINVAL;
ptr = kzalloc(outlen, GFP_KERNEL);
if (!ptr)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
index 285d32d2fd08..d7c020f72401 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -365,7 +365,7 @@ void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs)
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++)
accel_fs_tcp_destroy_table(fs, i);
- kfree(accel_tcp);
+ kvfree(accel_tcp);
mlx5e_fs_set_accel_tcp(fs, NULL);
}
@@ -397,7 +397,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs)
err_destroy_tables:
while (--i >= 0)
accel_fs_tcp_destroy_table(fs, i);
- kfree(accel_tcp);
+ kvfree(accel_tcp);
mlx5e_fs_set_accel_tcp(fs, NULL);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index 3dc6c987b8da..f900709639f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -427,15 +427,15 @@ mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci)
return NULL;
}
-static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec,
- struct mlx5e_macsec_sa *rx_sa,
- bool active)
+static int macsec_rx_sa_active_update(struct macsec_context *ctx,
+ struct mlx5e_macsec_sa *rx_sa,
+ bool active)
{
- struct mlx5_core_dev *mdev = macsec->mdev;
- struct mlx5_macsec_obj_attrs attrs = {};
+ struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
+ struct mlx5e_macsec *macsec = priv->macsec;
int err = 0;
- if (rx_sa->active != active)
+ if (rx_sa->active == active)
return 0;
rx_sa->active = active;
@@ -444,13 +444,11 @@ static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec,
return 0;
}
- attrs.sci = cpu_to_be64((__force u64)rx_sa->sci);
- attrs.enc_key_id = rx_sa->enc_key_id;
- err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id);
+ err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false);
if (err)
- return err;
+ rx_sa->active = false;
- return 0;
+ return err;
}
static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx)
@@ -476,6 +474,11 @@ static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx)
return false;
}
+ if (!ctx->secy->tx_sc.encrypt) {
+ netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n");
+ return false;
+ }
+
return true;
}
@@ -620,6 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
if (tx_sa->active == ctx_tx_sa->active)
goto out;
+ tx_sa->active = ctx_tx_sa->active;
if (tx_sa->assoc_num != tx_sc->encoding_sa)
goto out;
@@ -635,8 +639,6 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx)
mlx5e_macsec_cleanup_sa(macsec, tx_sa, true);
}
-
- tx_sa->active = ctx_tx_sa->active;
out:
mutex_unlock(&macsec->lock);
@@ -736,9 +738,14 @@ static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx)
sc_xarray_element->rx_sc = rx_sc;
err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element,
- XA_LIMIT(1, USHRT_MAX), GFP_KERNEL);
- if (err)
+ XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL);
+ if (err) {
+ if (err == -EBUSY)
+ netdev_err(ctx->netdev,
+ "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n",
+ MLX5_MACEC_RX_FS_ID_MAX);
goto destroy_sc_xarray_elemenet;
+ }
rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL);
if (!rx_sc->md_dst) {
@@ -798,16 +805,16 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx)
goto out;
}
- rx_sc->active = ctx_rx_sc->active;
if (rx_sc->active == ctx_rx_sc->active)
goto out;
+ rx_sc->active = ctx_rx_sc->active;
for (i = 0; i < MACSEC_NUM_AN; ++i) {
rx_sa = rx_sc->rx_sa[i];
if (!rx_sa)
continue;
- err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active);
+ err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active);
if (err)
goto out;
}
@@ -818,16 +825,43 @@ out:
return err;
}
+static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc)
+{
+ struct mlx5e_macsec_sa *rx_sa;
+ int i;
+
+ for (i = 0; i < MACSEC_NUM_AN; ++i) {
+ rx_sa = rx_sc->rx_sa[i];
+ if (!rx_sa)
+ continue;
+
+ mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
+ mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
+
+ kfree(rx_sa);
+ rx_sc->rx_sa[i] = NULL;
+ }
+
+ /* At this point the relevant MACsec offload Rx rule already removed at
+ * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current
+ * Rx related data propagating using xa_erase which uses rcu to sync,
+ * once fs_id is erased then this rx_sc is hidden from datapath.
+ */
+ list_del_rcu(&rx_sc->rx_sc_list_element);
+ xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id);
+ metadata_dst_free(rx_sc->md_dst);
+ kfree(rx_sc->sc_xarray_element);
+ kfree_rcu(rx_sc);
+}
+
static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx)
{
struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc;
- struct mlx5e_macsec_sa *rx_sa;
struct mlx5e_macsec *macsec;
struct list_head *list;
int err = 0;
- int i;
mutex_lock(&priv->macsec->lock);
@@ -849,31 +883,7 @@ static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx)
goto out;
}
- for (i = 0; i < MACSEC_NUM_AN; ++i) {
- rx_sa = rx_sc->rx_sa[i];
- if (!rx_sa)
- continue;
-
- mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
- mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
-
- kfree(rx_sa);
- rx_sc->rx_sa[i] = NULL;
- }
-
-/*
- * At this point the relevant MACsec offload Rx rule already removed at
- * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current
- * Rx related data propagating using xa_erase which uses rcu to sync,
- * once fs_id is erased then this rx_sc is hidden from datapath.
- */
- list_del_rcu(&rx_sc->rx_sc_list_element);
- xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id);
- metadata_dst_free(rx_sc->md_dst);
- kfree(rx_sc->sc_xarray_element);
-
- kfree_rcu(rx_sc);
-
+ macsec_del_rxsc_ctx(macsec, rx_sc);
out:
mutex_unlock(&macsec->lock);
@@ -1015,7 +1025,7 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx)
goto out;
}
- err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active);
+ err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active);
out:
mutex_unlock(&macsec->lock);
@@ -1234,7 +1244,6 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx)
struct mlx5e_priv *priv = netdev_priv(ctx->netdev);
struct mlx5e_macsec_device *macsec_device;
struct mlx5e_macsec_rx_sc *rx_sc, *tmp;
- struct mlx5e_macsec_sa *rx_sa;
struct mlx5e_macsec_sa *tx_sa;
struct mlx5e_macsec *macsec;
struct list_head *list;
@@ -1263,28 +1272,15 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx)
}
list = &macsec_device->macsec_rx_sc_list_head;
- list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) {
- for (i = 0; i < MACSEC_NUM_AN; ++i) {
- rx_sa = rx_sc->rx_sa[i];
- if (!rx_sa)
- continue;
-
- mlx5e_macsec_cleanup_sa(macsec, rx_sa, false);
- mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id);
- kfree(rx_sa);
- rx_sc->rx_sa[i] = NULL;
- }
-
- list_del_rcu(&rx_sc->rx_sc_list_element);
-
- kfree_rcu(rx_sc);
- }
+ list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element)
+ macsec_del_rxsc_ctx(macsec, rx_sc);
kfree(macsec_device->dev_addr);
macsec_device->dev_addr = NULL;
list_del_rcu(&macsec_device->macsec_device_list_element);
--macsec->num_of_devices;
+ kfree(macsec_device);
out:
mutex_unlock(&macsec->lock);
@@ -1748,7 +1744,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev,
if (!macsec)
return;
- fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data);
+ fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data);
rcu_read_lock();
sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
index d580b4a91253..347380a2cd9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h
@@ -10,9 +10,11 @@
#include <net/macsec.h>
#include <net/dst_metadata.h>
-/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */
+/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */
+#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */
+#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX
#define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1)
-#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0))
+#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK)
struct mlx5e_priv;
struct mlx5e_macsec;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
index 1ac0cf04e811..5b658a5588c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
@@ -250,7 +250,7 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs)
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
u32 *flow_group_in;
- int err = 0;
+ int err;
ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC);
if (!ns)
@@ -261,8 +261,10 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs)
return -ENOMEM;
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
+ if (!flow_group_in) {
+ err = -ENOMEM;
goto out_spec;
+ }
tx_tables = &tx_fs->tables;
ft_crypto = &tx_tables->ft_crypto;
@@ -898,7 +900,7 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs)
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
u32 *flow_group_in;
- int err = 0;
+ int err;
ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC);
if (!ns)
@@ -909,8 +911,10 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs)
return -ENOMEM;
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
- if (!flow_group_in)
+ if (!flow_group_in) {
+ err = -ENOMEM;
goto free_spec;
+ }
rx_tables = &rx_fs->tables;
ft_crypto = &rx_tables->ft_crypto;
@@ -1142,10 +1146,10 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
ft_crypto = &rx_tables->ft_crypto;
/* Set bit[31 - 30] macsec marker - 0x01 */
- /* Set bit[3-0] fs id */
+ /* Set bit[15-0] fs id */
MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
- MLX5_SET(set_action_in, action, data, fs_id | BIT(30));
+ MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30));
MLX5_SET(set_action_in, action, offset, 0);
MLX5_SET(set_action_in, action, length, 32);
@@ -1205,6 +1209,7 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs,
rx_rule->rule[1] = rule;
}
+ kvfree(spec);
return macsec_rule;
err:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 2169486c4bfb..374e3fbdc2cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1362,6 +1362,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
devl_rate_nodes_destroy(devlink);
}
+ /* Destroy legacy fdb when disabling sriov in legacy mode. */
+ if (esw->mode == MLX5_ESWITCH_LEGACY)
+ mlx5_eswitch_disable_locked(esw);
esw->esw_funcs.num_vfs = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index f68dc2d0dbe6..3029bc1c0dd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -736,6 +736,14 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
struct mlx5_eswitch *slave_esw);
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
+{
+ if (mlx5_esw_allowed(esw))
+ return esw->esw_funcs.num_vfs;
+
+ return 0;
+}
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 3fda75fe168c..8c6c9bcb3dc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3387,6 +3387,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
int err;
esw->mode = MLX5_ESWITCH_LEGACY;
+
+ /* If changing from switchdev to legacy mode without sriov enabled,
+ * no need to create legacy fdb.
+ */
+ if (!mlx5_sriov_is_enabled(esw->dev))
+ return 0;
+
err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS);
if (err)
NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index 108a3503f413..edd910258314 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -312,6 +312,8 @@ revert_changes:
for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) {
struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl;
+ attr->dests[curr_dest].termtbl = NULL;
+
/* search for the destination associated with the
* current term table
*/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index be1307a63e6d..32c3e0a649a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -700,10 +700,13 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
return false;
#ifdef CONFIG_MLX5_ESWITCH
- dev = ldev->pf[MLX5_LAG_P1].dev;
- if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev))
- return false;
+ for (i = 0; i < ldev->ports; i++) {
+ dev = ldev->pf[i].dev;
+ if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
+ return false;
+ }
+ dev = ldev->pf[MLX5_LAG_P1].dev;
mode = mlx5_eswitch_mode(dev);
for (i = 0; i < ldev->ports; i++)
if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index 31d443dd8386..f68461b13391 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -46,7 +46,7 @@ static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn,
int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
struct mlx5dr_action *action)
{
- int ret;
+ int ret = -EOPNOTSUPP;
if (action && action->action_type != DR_ACTION_TYP_FT)
return -EOPNOTSUPP;
@@ -67,6 +67,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
goto out;
}
+ if (ret)
+ goto out;
+
/* Release old action */
if (tbl->miss_action)
refcount_dec(&tbl->miss_action->refcount);
diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
index 81a8ccca7e5e..5693784eec5b 100644
--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
+++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c
@@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg,
goto err_out;
usleep_range(26, 100);
- while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) &&
+ while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) &&
(mistat & BUSY))
cpu_relax();
@@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg,
goto err_out;
usleep_range(26, 100);
- while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) &&
+ while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) &&
(mistat & BUSY))
cpu_relax();
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
index 66360c8c5a38..141897dfe388 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
@@ -317,7 +317,7 @@ int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb)
next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry);
db_hw = &next_dcb_hw->db[0];
if (!(db_hw->status & FDMA_DCB_STATUS_DONE))
- tx->dropped++;
+ return -EINVAL;
db = list_first_entry(&tx->db_list, struct sparx5_db, list);
list_move_tail(&db->list, &tx->db_list);
next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index eeac04b84638..b6bbb3c9bd7a 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -887,6 +887,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
cleanup_ports:
sparx5_cleanup_ports(sparx5);
+ if (sparx5->mact_queue)
+ destroy_workqueue(sparx5->mact_queue);
cleanup_config:
kfree(configs);
cleanup_pnode:
@@ -911,6 +913,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev)
sparx5_cleanup_ports(sparx5);
/* Unregister netdevs */
sparx5_unregister_notifier_blocks(sparx5);
+ destroy_workqueue(sparx5->mact_queue);
return 0;
}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
index 83c16ca5b30f..6db6ac6a3bbc 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
@@ -234,9 +234,8 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
sparx5_set_port_ifh(ifh, port->portno);
if (sparx5->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
- ret = sparx5_ptp_txtstamp_request(port, skb);
- if (ret)
- return ret;
+ if (sparx5_ptp_txtstamp_request(port, skb) < 0)
+ return NETDEV_TX_BUSY;
sparx5_set_port_ifh_rew_op(ifh, SPARX5_SKB_CB(skb)->rew_op);
sparx5_set_port_ifh_pdu_type(ifh, SPARX5_SKB_CB(skb)->pdu_type);
@@ -250,23 +249,31 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
else
ret = sparx5_inject(sparx5, ifh, skb, dev);
- if (ret == NETDEV_TX_OK) {
- stats->tx_bytes += skb->len;
- stats->tx_packets++;
+ if (ret == -EBUSY)
+ goto busy;
+ if (ret < 0)
+ goto drop;
- if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
- SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
- return ret;
+ stats->tx_bytes += skb->len;
+ stats->tx_packets++;
+ sparx5->tx.packets++;
- dev_kfree_skb_any(skb);
- } else {
- stats->tx_dropped++;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+ return NETDEV_TX_OK;
- if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
- SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
- sparx5_ptp_txtstamp_release(port, skb);
- }
- return ret;
+ dev_consume_skb_any(skb);
+ return NETDEV_TX_OK;
+drop:
+ stats->tx_dropped++;
+ sparx5->tx.dropped++;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+busy:
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
+ SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
+ sparx5_ptp_txtstamp_release(port, skb);
+ return NETDEV_TX_BUSY;
}
static enum hrtimer_restart sparx5_injection_timeout(struct hrtimer *tmr)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h
index 4a6efe6ada08..65c24ee49efd 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma.h
+++ b/drivers/net/ethernet/microsoft/mana/gdma.h
@@ -498,7 +498,14 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0)
-#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT
+/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed,
+ * so the driver is able to reliably support features like busy_poll.
+ */
+#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
+
+#define GDMA_DRV_CAP_FLAGS1 \
+ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
+ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 9259a74eca40..27a0f3af8aab 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1303,10 +1303,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
xdp_do_flush();
}
-static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
+static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
{
struct mana_cq *cq = context;
u8 arm_bit;
+ int w;
WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
@@ -1315,26 +1316,31 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
else
mana_poll_tx_cq(cq);
- if (cq->work_done < cq->budget &&
- napi_complete_done(&cq->napi, cq->work_done)) {
+ w = cq->work_done;
+
+ if (w < cq->budget &&
+ napi_complete_done(&cq->napi, w)) {
arm_bit = SET_ARM_BIT;
} else {
arm_bit = 0;
}
mana_gd_ring_cq(gdma_queue, arm_bit);
+
+ return w;
}
static int mana_poll(struct napi_struct *napi, int budget)
{
struct mana_cq *cq = container_of(napi, struct mana_cq, napi);
+ int w;
cq->work_done = 0;
cq->budget = budget;
- mana_cq_handler(cq, cq->gdma_cq);
+ w = mana_cq_handler(cq, cq->gdma_cq);
- return min(cq->work_done, budget);
+ return min(w, budget);
}
static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue)
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
index 2b427d8ccb2f..ccacb6ab6c39 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
@@ -282,7 +282,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
dma_len = skb_headlen(skb);
if (skb_is_gso(skb))
type = NFDK_DESC_TX_TYPE_TSO;
- else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
+ else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
@@ -927,7 +927,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
dma_len = pkt_len;
dma_addr = rxbuf->dma_addr + dma_off;
- if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
+ if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
@@ -1325,7 +1325,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
txbuf = &tx_ring->ktxbufs[wr_idx];
dma_len = skb_headlen(skb);
- if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD)
+ if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD)
type = NFDK_DESC_TX_TYPE_SIMPLE;
else
type = NFDK_DESC_TX_TYPE_GATHER;
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 19d043b593cc..62320be4de5a 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev)
struct sk_buff *skb;
int i;
- for (i = 0; i < RX_BD_NUM; i++) {
- phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
- phys);
-
- dma_unmap_single(ndev->dev.parent, phys_addr,
- NIXGE_MAX_JUMBO_FRAME_SIZE,
- DMA_FROM_DEVICE);
-
- skb = (struct sk_buff *)(uintptr_t)
- nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
- sw_id_offset);
- dev_kfree_skb(skb);
- }
+ if (priv->rx_bd_v) {
+ for (i = 0; i < RX_BD_NUM; i++) {
+ phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
+ phys);
+
+ dma_unmap_single(ndev->dev.parent, phys_addr,
+ NIXGE_MAX_JUMBO_FRAME_SIZE,
+ DMA_FROM_DEVICE);
+
+ skb = (struct sk_buff *)(uintptr_t)
+ nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i],
+ sw_id_offset);
+ dev_kfree_skb(skb);
+ }
- if (priv->rx_bd_v)
dma_free_coherent(ndev->dev.parent,
sizeof(*priv->rx_bd_v) * RX_BD_NUM,
priv->rx_bd_v,
priv->rx_bd_p);
+ }
if (priv->tx_skb)
devm_kfree(ndev->dev.parent, priv->tx_skb);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 9fb1fa479d4b..16e6bd466143 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -767,34 +767,34 @@ static int qed_mcp_cancel_load_req(struct qed_hwfn *p_hwfn,
return rc;
}
-#define CONFIG_QEDE_BITMAP_IDX BIT(0)
-#define CONFIG_QED_SRIOV_BITMAP_IDX BIT(1)
-#define CONFIG_QEDR_BITMAP_IDX BIT(2)
-#define CONFIG_QEDF_BITMAP_IDX BIT(4)
-#define CONFIG_QEDI_BITMAP_IDX BIT(5)
-#define CONFIG_QED_LL2_BITMAP_IDX BIT(6)
+#define BITMAP_IDX_FOR_CONFIG_QEDE BIT(0)
+#define BITMAP_IDX_FOR_CONFIG_QED_SRIOV BIT(1)
+#define BITMAP_IDX_FOR_CONFIG_QEDR BIT(2)
+#define BITMAP_IDX_FOR_CONFIG_QEDF BIT(4)
+#define BITMAP_IDX_FOR_CONFIG_QEDI BIT(5)
+#define BITMAP_IDX_FOR_CONFIG_QED_LL2 BIT(6)
static u32 qed_get_config_bitmap(void)
{
u32 config_bitmap = 0x0;
if (IS_ENABLED(CONFIG_QEDE))
- config_bitmap |= CONFIG_QEDE_BITMAP_IDX;
+ config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDE;
if (IS_ENABLED(CONFIG_QED_SRIOV))
- config_bitmap |= CONFIG_QED_SRIOV_BITMAP_IDX;
+ config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_SRIOV;
if (IS_ENABLED(CONFIG_QED_RDMA))
- config_bitmap |= CONFIG_QEDR_BITMAP_IDX;
+ config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDR;
if (IS_ENABLED(CONFIG_QED_FCOE))
- config_bitmap |= CONFIG_QEDF_BITMAP_IDX;
+ config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDF;
if (IS_ENABLED(CONFIG_QED_ISCSI))
- config_bitmap |= CONFIG_QEDI_BITMAP_IDX;
+ config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDI;
if (IS_ENABLED(CONFIG_QED_LL2))
- config_bitmap |= CONFIG_QED_LL2_BITMAP_IDX;
+ config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_LL2;
return config_bitmap;
}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index bd0607680329..2fd5c6fdb500 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -2991,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter)
QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val);
dev_info(&adapter->pdev->dev,
"%s: lock recovery initiated\n", __func__);
- msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY);
+ mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY);
val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK);
id = ((val >> 2) & 0xF);
if (id == adapter->portnum) {
@@ -3027,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter)
if (status)
break;
- msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY);
+ mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY);
i++;
if (i == 1)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 36324126db6d..33f723a9f471 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -841,7 +841,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
napi_gro_receive(&priv->napi[q],
priv->rx_1st_skb);
stats->rx_packets++;
- stats->rx_bytes += priv->rx_1st_skb->len;
+ stats->rx_bytes += pkt_len;
break;
}
}
@@ -3020,6 +3020,7 @@ static int __maybe_unused ravb_resume(struct device *dev)
ret = ravb_open(ndev);
if (ret < 0)
return ret;
+ ravb_set_rx_mode(ndev);
netif_device_attach(ndev);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index c25bfecb4a2d..e5cfde1cbd5c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -748,6 +748,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
if (fc & FLOW_RX) {
pr_debug("\tReceive Flow-Control ON\n");
flow |= GMAC_RX_FLOW_CTRL_RFE;
+ } else {
+ pr_debug("\tReceive Flow-Control OFF\n");
}
writel(flow, ioaddr + GMAC_RX_FLOW_CTRL);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6b43da78cdf0..23ec0a9e396c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1061,8 +1061,16 @@ static void stmmac_mac_link_up(struct phylink_config *config,
ctrl |= priv->hw->link.duplex;
/* Flow Control operation */
- if (tx_pause && rx_pause)
- stmmac_mac_flow_ctrl(priv, duplex);
+ if (rx_pause && tx_pause)
+ priv->flow_ctrl = FLOW_AUTO;
+ else if (rx_pause && !tx_pause)
+ priv->flow_ctrl = FLOW_RX;
+ else if (!rx_pause && tx_pause)
+ priv->flow_ctrl = FLOW_TX;
+ else
+ priv->flow_ctrl = FLOW_OFF;
+
+ stmmac_mac_flow_ctrl(priv, duplex);
if (ctrl != old_ctrl)
writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 50f6b4a14be4..eb6d9cd8e93f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en");
axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm");
- axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe");
- axi->axi_fb = of_property_read_bool(np, "snps,axi_fb");
- axi->axi_mb = of_property_read_bool(np, "snps,axi_mb");
- axi->axi_rb = of_property_read_bool(np, "snps,axi_rb");
+ axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe");
+ axi->axi_fb = of_property_read_bool(np, "snps,fb");
+ axi->axi_mb = of_property_read_bool(np, "snps,mb");
+ axi->axi_rb = of_property_read_bool(np, "snps,rb");
if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt))
axi->axi_wr_osr_lmt = 1;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index c50b137f92d7..b3b0ba842541 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1454,7 +1454,7 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy
if (speed == SPEED_1000)
mac_control |= CPSW_SL_CTL_GIG;
- if (speed == SPEED_10 && interface == PHY_INTERFACE_MODE_RGMII)
+ if (speed == SPEED_10 && phy_interface_mode_is_rgmii(interface))
/* Can be used with in band mode only */
mac_control |= CPSW_SL_CTL_EXT_EN;
if (speed == SPEED_100 && interface == PHY_INTERFACE_MODE_RMII)
@@ -2082,7 +2082,7 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common)
for (i = 0; i < common->port_num; i++) {
port = &common->ports[i];
- if (port->ndev)
+ if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED)
unregister_netdev(port->ndev);
}
}
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index 450b16ad40a4..e1a569b99e4a 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -885,7 +885,7 @@ static int ca8210_spi_transfer(
dev_dbg(&spi->dev, "%s called\n", __func__);
- cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC);
+ cas_ctl = kzalloc(sizeof(*cas_ctl), GFP_ATOMIC);
if (!cas_ctl)
return -ENOMEM;
diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
index c69b87d3837d..edc769daad07 100644
--- a/drivers/net/ieee802154/cc2520.c
+++ b/drivers/net/ieee802154/cc2520.c
@@ -970,7 +970,7 @@ static int cc2520_hw_init(struct cc2520_private *priv)
if (timeout-- <= 0) {
dev_err(&priv->spi->dev, "oscillator start failed!\n");
- return ret;
+ return -ETIMEDOUT;
}
udelay(1);
} while (!(status & CC2520_STATUS_XOSC32M_STABLE));
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 14e8d04cb434..2e9742952c4e 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -211,7 +211,7 @@ static __net_init int loopback_net_init(struct net *net)
int err;
err = -ENOMEM;
- dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup);
+ dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup);
if (!dev)
goto out;
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index f41f67b583db..2fbac51b9b19 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3698,6 +3698,7 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = {
[IFLA_MACSEC_SCB] = { .type = NLA_U8 },
[IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 },
[IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 },
+ [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 },
};
static void macsec_free_netdev(struct net_device *dev)
diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c
index 689e728345ce..b782c35c4ac1 100644
--- a/drivers/net/mdio/fwnode_mdio.c
+++ b/drivers/net/mdio/fwnode_mdio.c
@@ -98,6 +98,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio,
*/
rc = phy_device_register(phy);
if (rc) {
+ device_set_node(&phy->mdio.dev, NULL);
fwnode_handle_put(child);
return rc;
}
@@ -148,12 +149,13 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus,
/* Associate the fwnode with the device structure so it
* can be looked up later.
*/
- phy->mdio.dev.fwnode = child;
+ phy->mdio.dev.fwnode = fwnode_handle_get(child);
/* All data is now stored in the phy struct, so register it */
rc = phy_device_register(phy);
if (rc) {
- fwnode_handle_put(phy->mdio.dev.fwnode);
+ phy->mdio.dev.fwnode = NULL;
+ fwnode_handle_put(child);
goto clean_phy;
}
} else if (is_of_node(child)) {
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 796e9c7857d0..510822d6d0d9 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -68,8 +68,9 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
/* All data is now stored in the mdiodev struct; register it. */
rc = mdio_device_register(mdiodev);
if (rc) {
+ device_set_node(&mdiodev->dev, NULL);
+ fwnode_handle_put(fwnode);
mdio_device_free(mdiodev);
- of_node_put(child);
return rc;
}
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index bdff9ac5056d..4f4f79532c6c 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -332,10 +332,8 @@ static ssize_t enabled_store(struct config_item *item,
}
if (enabled) { /* true */
- if (nt->extended && !(netconsole_ext.flags & CON_ENABLED)) {
- netconsole_ext.flags |= CON_ENABLED;
+ if (nt->extended && !console_is_registered(&netconsole_ext))
register_console(&netconsole_ext);
- }
/*
* Skip netpoll_parse_options() -- all the attributes are
@@ -869,7 +867,7 @@ static void write_msg(struct console *con, const char *msg, unsigned int len)
static struct console netconsole_ext = {
.name = "netcon_ext",
- .flags = CON_EXTENDED, /* starts disabled, registered on first use */
+ .flags = CON_ENABLED | CON_EXTENDED,
.write = write_ext_msg,
};
@@ -883,6 +881,7 @@ static int __init init_netconsole(void)
{
int err;
struct netconsole_target *nt, *tmp;
+ bool extended = false;
unsigned long flags;
char *target_config;
char *input = config;
@@ -895,11 +894,12 @@ static int __init init_netconsole(void)
goto fail;
}
/* Dump existing printks when we register */
- if (nt->extended)
- netconsole_ext.flags |= CON_PRINTBUFFER |
- CON_ENABLED;
- else
+ if (nt->extended) {
+ extended = true;
+ netconsole_ext.flags |= CON_PRINTBUFFER;
+ } else {
netconsole.flags |= CON_PRINTBUFFER;
+ }
spin_lock_irqsave(&target_list_lock, flags);
list_add(&nt->list, &target_list);
@@ -915,7 +915,7 @@ static int __init init_netconsole(void)
if (err)
goto undonotifier;
- if (netconsole_ext.flags & CON_ENABLED)
+ if (extended)
register_console(&netconsole_ext);
register_console(&netconsole);
pr_info("network logging started\n");
@@ -945,7 +945,8 @@ static void __exit cleanup_netconsole(void)
{
struct netconsole_target *nt, *tmp;
- unregister_console(&netconsole_ext);
+ if (console_is_registered(&netconsole_ext))
+ unregister_console(&netconsole_ext);
unregister_console(&netconsole);
dynamic_netconsole_exit();
unregister_netdevice_notifier(&netconsole_netdev_notifier);
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 464d88ca8ab0..a4abea921046 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void)
rc = ntb_transport_register_client_dev(KBUILD_MODNAME);
if (rc)
return rc;
- return ntb_transport_register_client(&ntb_netdev_client);
+
+ rc = ntb_transport_register_client(&ntb_netdev_client);
+ if (rc) {
+ ntb_transport_unregister_client_dev(KBUILD_MODNAME);
+ return rc;
+ }
+
+ return 0;
}
module_init(ntb_netdev_init_module);
diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index 250742ffdfd9..044828d081d2 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/unistd.h>
+#include <linux/property.h>
void mdio_device_free(struct mdio_device *mdiodev)
{
@@ -30,6 +31,7 @@ EXPORT_SYMBOL(mdio_device_free);
static void mdio_device_release(struct device *dev)
{
+ fwnode_handle_put(dev->fwnode);
kfree(to_mdio_device(dev));
}
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index 24bae27eedef..cae24091fb6f 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/bitfield.h>
#include <linux/hwmon.h>
+#include <linux/mutex.h>
#include <linux/phy.h>
#include <linux/polynomial.h>
#include <linux/netdevice.h>
@@ -70,6 +71,14 @@
#define VPSPEC1_TEMP_STA 0x0E
#define VPSPEC1_TEMP_STA_DATA GENMASK(9, 0)
+/* Mailbox */
+#define VSPEC1_MBOX_DATA 0x5
+#define VSPEC1_MBOX_ADDRLO 0x6
+#define VSPEC1_MBOX_CMD 0x7
+#define VSPEC1_MBOX_CMD_ADDRHI GENMASK(7, 0)
+#define VSPEC1_MBOX_CMD_RD (0 << 8)
+#define VSPEC1_MBOX_CMD_READY BIT(15)
+
/* WoL */
#define VPSPEC2_WOL_CTL 0x0E06
#define VPSPEC2_WOL_AD01 0x0E08
@@ -77,7 +86,13 @@
#define VPSPEC2_WOL_AD45 0x0E0A
#define WOL_EN BIT(0)
+/* Internal registers, access via mbox */
+#define REG_GPIO0_OUT 0xd3ce00
+
struct gpy_priv {
+ /* serialize mailbox acesses */
+ struct mutex mbox_lock;
+
u8 fw_major;
u8 fw_minor;
};
@@ -187,6 +202,45 @@ static int gpy_hwmon_register(struct phy_device *phydev)
}
#endif
+static int gpy_mbox_read(struct phy_device *phydev, u32 addr)
+{
+ struct gpy_priv *priv = phydev->priv;
+ int val, ret;
+ u16 cmd;
+
+ mutex_lock(&priv->mbox_lock);
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_ADDRLO,
+ addr);
+ if (ret)
+ goto out;
+
+ cmd = VSPEC1_MBOX_CMD_RD;
+ cmd |= FIELD_PREP(VSPEC1_MBOX_CMD_ADDRHI, addr >> 16);
+
+ ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_CMD, cmd);
+ if (ret)
+ goto out;
+
+ /* The mbox read is used in the interrupt workaround. It was observed
+ * that a read might take up to 2.5ms. This is also the time for which
+ * the interrupt line is stuck low. To be on the safe side, poll the
+ * ready bit for 10ms.
+ */
+ ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
+ VSPEC1_MBOX_CMD, val,
+ (val & VSPEC1_MBOX_CMD_READY),
+ 500, 10000, false);
+ if (ret)
+ goto out;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_DATA);
+
+out:
+ mutex_unlock(&priv->mbox_lock);
+ return ret;
+}
+
static int gpy_config_init(struct phy_device *phydev)
{
int ret;
@@ -201,6 +255,13 @@ static int gpy_config_init(struct phy_device *phydev)
return ret < 0 ? ret : 0;
}
+static bool gpy_has_broken_mdint(struct phy_device *phydev)
+{
+ /* At least these PHYs are known to have broken interrupt handling */
+ return phydev->drv->phy_id == PHY_ID_GPY215B ||
+ phydev->drv->phy_id == PHY_ID_GPY215C;
+}
+
static int gpy_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
@@ -218,6 +279,7 @@ static int gpy_probe(struct phy_device *phydev)
if (!priv)
return -ENOMEM;
phydev->priv = priv;
+ mutex_init(&priv->mbox_lock);
fw_version = phy_read(phydev, PHY_FWV);
if (fw_version < 0)
@@ -492,6 +554,29 @@ static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev)
if (!(reg & PHY_IMASK_MASK))
return IRQ_NONE;
+ /* The PHY might leave the interrupt line asserted even after PHY_ISTAT
+ * is read. To avoid interrupt storms, delay the interrupt handling as
+ * long as the PHY drives the interrupt line. An internal bus read will
+ * stall as long as the interrupt line is asserted, thus just read a
+ * random register here.
+ * Because we cannot access the internal bus at all while the interrupt
+ * is driven by the PHY, there is no way to make the interrupt line
+ * unstuck (e.g. by changing the pinmux to GPIO input) during that time
+ * frame. Therefore, polling is the best we can do and won't do any more
+ * harm.
+ * It was observed that this bug happens on link state and link speed
+ * changes on a GPY215B and GYP215C independent of the firmware version
+ * (which doesn't mean that this list is exhaustive).
+ */
+ if (gpy_has_broken_mdint(phydev) &&
+ (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC))) {
+ reg = gpy_mbox_read(phydev, REG_GPIO0_OUT);
+ if (reg < 0) {
+ phy_error(phydev);
+ return IRQ_NONE;
+ }
+ }
+
phy_trigger_machine(phydev);
return IRQ_HANDLED;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 57849ac0384e..8cff61dbc4b5 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -217,6 +217,7 @@ static void phy_mdio_device_free(struct mdio_device *mdiodev)
static void phy_device_release(struct device *dev)
{
+ fwnode_handle_put(dev->fwnode);
kfree(to_phy_device(dev));
}
@@ -1520,6 +1521,7 @@ error:
error_module_put:
module_put(d->driver->owner);
+ d->driver = NULL;
error_put_device:
put_device(d);
if (ndev_owner != bus->owner)
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6547b6cc6cbe..2805b04d6402 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1603,19 +1603,29 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
linkmode_copy(supported, phy->supported);
linkmode_copy(config.advertising, phy->advertising);
- /* Clause 45 PHYs switch their Serdes lane between several different
- * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G
- * speeds. We really need to know which interface modes the PHY and
- * MAC supports to properly work out which linkmodes can be supported.
+ /* Check whether we would use rate matching for the proposed interface
+ * mode.
*/
- if (phy->is_c45 &&
+ config.rate_matching = phy_get_rate_matching(phy, interface);
+
+ /* Clause 45 PHYs may switch their Serdes lane between, e.g. 10GBASE-R,
+ * 5GBASE-R, 2500BASE-X and SGMII if they are not using rate matching.
+ * For some interface modes (e.g. RXAUI, XAUI and USXGMII) switching
+ * their Serdes is either unnecessary or not reasonable.
+ *
+ * For these which switch interface modes, we really need to know which
+ * interface modes the PHY supports to properly work out which ethtool
+ * linkmodes can be supported. For now, as a work-around, we validate
+ * against all interface modes, which may lead to more ethtool link
+ * modes being advertised than are actually supported.
+ */
+ if (phy->is_c45 && config.rate_matching == RATE_MATCH_NONE &&
interface != PHY_INTERFACE_MODE_RXAUI &&
interface != PHY_INTERFACE_MODE_XAUI &&
interface != PHY_INTERFACE_MODE_USXGMII)
config.interface = PHY_INTERFACE_MODE_NA;
else
config.interface = interface;
- config.rate_matching = phy_get_rate_matching(phy, config.interface);
ret = phylink_validate(pl, supported, &config);
if (ret) {
diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
index c8791e9b451d..40ce8abe6999 100644
--- a/drivers/net/plip/plip.c
+++ b/drivers/net/plip/plip.c
@@ -450,12 +450,12 @@ plip_bh_timeout_error(struct net_device *dev, struct net_local *nl,
}
rcv->state = PLIP_PK_DONE;
if (rcv->skb) {
- kfree_skb(rcv->skb);
+ dev_kfree_skb_irq(rcv->skb);
rcv->skb = NULL;
}
snd->state = PLIP_PK_DONE;
if (snd->skb) {
- dev_kfree_skb(snd->skb);
+ dev_consume_skb_irq(snd->skb);
snd->skb = NULL;
}
spin_unlock_irq(&nl->lock);
diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
index a52ee2bf5575..6312f67f260e 100644
--- a/drivers/net/thunderbolt.c
+++ b/drivers/net/thunderbolt.c
@@ -914,6 +914,7 @@ static int tbnet_open(struct net_device *dev)
eof_mask, tbnet_start_poll, net);
if (!ring) {
netdev_err(dev, "failed to allocate Rx ring\n");
+ tb_xdomain_release_out_hopid(xd, hopid);
tb_ring_free(net->tx_ring.ring);
net->tx_ring.ring = NULL;
return -ENOMEM;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7a3ab3427369..24001112c323 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -686,7 +686,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
if (tun)
xdp_rxq_info_unreg(&tfile->xdp_rxq);
ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
- sock_put(&tfile->sk);
}
}
@@ -702,6 +701,9 @@ static void tun_detach(struct tun_file *tfile, bool clean)
if (dev)
netdev_state_change(dev);
rtnl_unlock();
+
+ if (clean)
+ sock_put(&tfile->sk);
}
static void tun_detach_all(struct net_device *dev)
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index d3e7b27eb933..6f1e560fb15c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -75,8 +75,14 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
for (i = 0; i < adapter->intr.num_intrs; i++)
vmxnet3_enable_intr(adapter, i);
- adapter->shared->devRead.intrConf.intrCtrl &=
+ if (!VMXNET3_VERSION_GE_6(adapter) ||
+ !adapter->queuesExtEnabled) {
+ adapter->shared->devRead.intrConf.intrCtrl &=
+ cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
+ } else {
+ adapter->shared->devReadExt.intrConfExt.intrCtrl &=
cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
+ }
}
@@ -85,8 +91,14 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
{
int i;
- adapter->shared->devRead.intrConf.intrCtrl |=
+ if (!VMXNET3_VERSION_GE_6(adapter) ||
+ !adapter->queuesExtEnabled) {
+ adapter->shared->devRead.intrConf.intrCtrl |=
+ cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+ } else {
+ adapter->shared->devReadExt.intrConfExt.intrCtrl |=
cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+ }
for (i = 0; i < adapter->intr.num_intrs; i++)
vmxnet3_disable_intr(adapter, i);
}
@@ -1396,6 +1408,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
};
u32 num_pkts = 0;
bool skip_page_frags = false;
+ bool encap_lro = false;
struct Vmxnet3_RxCompDesc *rcd;
struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
u16 segCnt = 0, mss = 0;
@@ -1556,13 +1569,18 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
if (VMXNET3_VERSION_GE_2(adapter) &&
rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
struct Vmxnet3_RxCompDescExt *rcdlro;
+ union Vmxnet3_GenericDesc *gdesc;
+
rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd;
+ gdesc = (union Vmxnet3_GenericDesc *)rcd;
segCnt = rcdlro->segCnt;
WARN_ON_ONCE(segCnt == 0);
mss = rcdlro->mss;
if (unlikely(segCnt <= 1))
segCnt = 0;
+ encap_lro = (le32_to_cpu(gdesc->dword[0]) &
+ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT));
} else {
segCnt = 0;
}
@@ -1630,7 +1648,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
vmxnet3_rx_csum(adapter, skb,
(union Vmxnet3_GenericDesc *)rcd);
skb->protocol = eth_type_trans(skb, adapter->netdev);
- if (!rcd->tcp ||
+ if ((!rcd->tcp && !encap_lro) ||
!(adapter->netdev->features & NETIF_F_LRO))
goto not_lro;
@@ -1639,7 +1657,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
SKB_GSO_TCPV4 : SKB_GSO_TCPV6;
skb_shinfo(skb)->gso_size = mss;
skb_shinfo(skb)->gso_segs = segCnt;
- } else if (segCnt != 0 || skb->len > mtu) {
+ } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) {
u32 hlen;
hlen = vmxnet3_get_hdr_len(adapter, skb,
@@ -1668,6 +1686,7 @@ not_lro:
napi_gro_receive(&rq->napi, skb);
ctx->skb = NULL;
+ encap_lro = false;
num_pkts++;
}
diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
index 9bbfff803357..b545d93c6e37 100644
--- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c
+++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
@@ -959,30 +959,51 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch)
return;
while (index + sizeof(*e) <= len) {
+ u16 attr_size;
+
e = (struct wilc_attr_entry *)&buf[index];
- if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST)
+ attr_size = le16_to_cpu(e->attr_len);
+
+ if (index + sizeof(*e) + attr_size > len)
+ return;
+
+ if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST &&
+ attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e)))
ch_list_idx = index;
- else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL)
+ else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL &&
+ attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e)))
op_ch_idx = index;
+
if (ch_list_idx && op_ch_idx)
break;
- index += le16_to_cpu(e->attr_len) + sizeof(*e);
+
+ index += sizeof(*e) + attr_size;
}
if (ch_list_idx) {
- u16 attr_size;
- struct wilc_ch_list_elem *e;
- int i;
+ u16 elem_size;
ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx];
- attr_size = le16_to_cpu(ch_list->attr_len);
- for (i = 0; i < attr_size;) {
+ /* the number of bytes following the final 'elem' member */
+ elem_size = le16_to_cpu(ch_list->attr_len) -
+ (sizeof(*ch_list) - sizeof(struct wilc_attr_entry));
+ for (unsigned int i = 0; i < elem_size;) {
+ struct wilc_ch_list_elem *e;
+
e = (struct wilc_ch_list_elem *)(ch_list->elem + i);
+
+ i += sizeof(*e);
+ if (i > elem_size)
+ break;
+
+ i += e->no_of_channels;
+ if (i > elem_size)
+ break;
+
if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) {
memset(e->ch_list, sta_ch, e->no_of_channels);
break;
}
- i += e->no_of_channels;
}
}
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
index eb1d1ba3a443..67df8221b5ae 100644
--- a/drivers/net/wireless/microchip/wilc1000/hif.c
+++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -482,14 +482,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss,
rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len);
if (rsn_ie) {
+ int rsn_ie_len = sizeof(struct element) + rsn_ie[1];
int offset = 8;
- param->mode_802_11i = 2;
- param->rsn_found = true;
/* extract RSN capabilities */
- offset += (rsn_ie[offset] * 4) + 2;
- offset += (rsn_ie[offset] * 4) + 2;
- memcpy(param->rsn_cap, &rsn_ie[offset], 2);
+ if (offset < rsn_ie_len) {
+ /* skip over pairwise suites */
+ offset += (rsn_ie[offset] * 4) + 2;
+
+ if (offset < rsn_ie_len) {
+ /* skip over authentication suites */
+ offset += (rsn_ie[offset] * 4) + 2;
+
+ if (offset + 1 < rsn_ie_len) {
+ param->mode_802_11i = 2;
+ param->rsn_found = true;
+ memcpy(param->rsn_cap, &rsn_ie[offset], 2);
+ }
+ }
+ }
}
if (param->rsn_found) {
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c
index 9c7a9a2a1f25..fc928b298a98 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c
@@ -332,6 +332,7 @@ struct iosm_mux *ipc_mux_init(struct ipc_mux_config *mux_cfg,
if (!ipc_mux->ul_adb.pp_qlt[i]) {
for (j = i - 1; j >= 0; j--)
kfree(ipc_mux->ul_adb.pp_qlt[j]);
+ kfree(ipc_mux);
return NULL;
}
}
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
index d41e373f9c0a..d6b166fc5c0e 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
@@ -365,7 +365,8 @@ static void ipc_mux_dl_cmd_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb)
/* Pass the DL packet to the netif layer. */
static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id,
struct iosm_wwan *wwan, u32 offset,
- u8 service_class, struct sk_buff *skb)
+ u8 service_class, struct sk_buff *skb,
+ u32 pkt_len)
{
struct sk_buff *dest_skb = skb_clone(skb, GFP_ATOMIC);
@@ -373,7 +374,7 @@ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id,
return -ENOMEM;
skb_pull(dest_skb, offset);
- skb_set_tail_pointer(dest_skb, dest_skb->len);
+ skb_trim(dest_skb, pkt_len);
/* Pass the packet to the netif layer. */
dest_skb->priority = service_class;
@@ -429,7 +430,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux,
static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux,
struct sk_buff *skb)
{
- u32 pad_len, packet_offset;
+ u32 pad_len, packet_offset, adgh_len;
struct iosm_wwan *wwan;
struct mux_adgh *adgh;
u8 *block = skb->data;
@@ -470,10 +471,12 @@ static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux,
packet_offset = sizeof(*adgh) + pad_len;
if_id += ipc_mux->wwan_q_offset;
+ adgh_len = le16_to_cpu(adgh->length);
/* Pass the packet to the netif layer */
rc = ipc_mux_net_receive(ipc_mux, if_id, wwan, packet_offset,
- adgh->service_class, skb);
+ adgh->service_class, skb,
+ adgh_len - packet_offset);
if (rc) {
dev_err(ipc_mux->dev, "mux adgh decoding error");
return;
@@ -547,7 +550,7 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh,
int if_id, int nr_of_dg)
{
u32 dl_head_pad_len = ipc_mux->session[if_id].dl_head_pad_len;
- u32 packet_offset, i, rc;
+ u32 packet_offset, i, rc, dg_len;
for (i = 0; i < nr_of_dg; i++, dg++) {
if (le32_to_cpu(dg->datagram_index)
@@ -562,11 +565,12 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh,
packet_offset =
le32_to_cpu(dg->datagram_index) +
dl_head_pad_len;
+ dg_len = le16_to_cpu(dg->datagram_length);
/* Pass the packet to the netif layer. */
rc = ipc_mux_net_receive(ipc_mux, if_id, ipc_mux->wwan,
packet_offset,
- dg->service_class,
- skb);
+ dg->service_class, skb,
+ dg_len - dl_head_pad_len);
if (rc)
goto dg_error;
}
@@ -1207,10 +1211,9 @@ static int mux_ul_dg_update_tbl_index(struct iosm_mux *ipc_mux,
qlth_n_ql_size, ul_list);
ipc_mux_ul_adb_finish(ipc_mux);
if (ipc_mux_ul_adb_allocate(ipc_mux, adb, &ipc_mux->size_needed,
- IOSM_AGGR_MUX_SIG_ADBH)) {
- dev_kfree_skb(src_skb);
+ IOSM_AGGR_MUX_SIG_ADBH))
return -ENOMEM;
- }
+
ipc_mux->size_needed = le32_to_cpu(adb->adbh->block_length);
ipc_mux->size_needed += offsetof(struct mux_adth, dg);
@@ -1471,8 +1474,7 @@ void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb)
ipc_mux->ul_data_pend_bytes);
/* Reset the skb settings. */
- skb->tail = 0;
- skb->len = 0;
+ skb_trim(skb, 0);
/* Add the consumed ADB to the free list. */
skb_queue_tail((&ipc_mux->ul_adb.free_list), skb);
diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h
index 9b3a6d86ece7..289397c4ea6c 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_protocol.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h
@@ -122,7 +122,7 @@ struct iosm_protocol {
struct iosm_imem *imem;
struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES];
struct device *dev;
- phys_addr_t phy_ap_shm;
+ dma_addr_t phy_ap_shm;
u32 old_msg_tail;
};
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 1545cbee77a4..3dbfc8a6924e 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data);
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
void xenvif_carrier_on(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 650fa180220f..f3f2c07423a6 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -254,14 +254,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
skb_clear_hash(skb);
- xenvif_rx_queue_tail(queue, skb);
+ if (!xenvif_rx_queue_tail(queue, skb))
+ goto drop;
+
xenvif_kick_thread(queue);
return NETDEV_TX_OK;
drop:
vif->dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
+ dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 3d2081bbbc86..bf627af723bf 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -332,10 +332,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue,
struct xenvif_tx_cb {
- u16 pending_idx;
+ u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1];
+ u8 copy_count;
};
#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
+#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i])
+#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count)
static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue,
u16 pending_idx,
@@ -370,31 +373,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
return skb;
}
-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue,
- struct sk_buff *skb,
- struct xen_netif_tx_request *txp,
- struct gnttab_map_grant_ref *gop,
- unsigned int frag_overflow,
- struct sk_buff *nskb)
+static void xenvif_get_requests(struct xenvif_queue *queue,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *first,
+ struct xen_netif_tx_request *txfrags,
+ unsigned *copy_ops,
+ unsigned *map_ops,
+ unsigned int frag_overflow,
+ struct sk_buff *nskb,
+ unsigned int extra_count,
+ unsigned int data_len)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
- int start;
+ u16 pending_idx;
pending_ring_idx_t index;
unsigned int nr_slots;
+ struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops;
+ struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
+ struct xen_netif_tx_request *txp = first;
+
+ nr_slots = shinfo->nr_frags + 1;
+
+ copy_count(skb) = 0;
+
+ /* Create copy ops for exactly data_len bytes into the skb head. */
+ __skb_put(skb, data_len);
+ while (data_len > 0) {
+ int amount = data_len > txp->size ? txp->size : data_len;
+
+ cop->source.u.ref = txp->gref;
+ cop->source.domid = queue->vif->domid;
+ cop->source.offset = txp->offset;
+
+ cop->dest.domid = DOMID_SELF;
+ cop->dest.offset = (offset_in_page(skb->data +
+ skb_headlen(skb) -
+ data_len)) & ~XEN_PAGE_MASK;
+ cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb)
+ - data_len);
+
+ cop->len = amount;
+ cop->flags = GNTCOPY_source_gref;
- nr_slots = shinfo->nr_frags;
+ index = pending_index(queue->pending_cons);
+ pending_idx = queue->pending_ring[index];
+ callback_param(queue, pending_idx).ctx = NULL;
+ copy_pending_idx(skb, copy_count(skb)) = pending_idx;
+ copy_count(skb)++;
+
+ cop++;
+ data_len -= amount;
- /* Skip first skb fragment if it is on same page as header fragment. */
- start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
+ if (amount == txp->size) {
+ /* The copy op covered the full tx_request */
+
+ memcpy(&queue->pending_tx_info[pending_idx].req,
+ txp, sizeof(*txp));
+ queue->pending_tx_info[pending_idx].extra_count =
+ (txp == first) ? extra_count : 0;
+
+ if (txp == first)
+ txp = txfrags;
+ else
+ txp++;
+ queue->pending_cons++;
+ nr_slots--;
+ } else {
+ /* The copy op partially covered the tx_request.
+ * The remainder will be mapped.
+ */
+ txp->offset += amount;
+ txp->size -= amount;
+ }
+ }
- for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
- shinfo->nr_frags++, txp++, gop++) {
+ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
+ shinfo->nr_frags++, gop++) {
index = pending_index(queue->pending_cons++);
pending_idx = queue->pending_ring[index];
- xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop);
+ xenvif_tx_create_map_op(queue, pending_idx, txp,
+ txp == first ? extra_count : 0, gop);
frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
+
+ if (txp == first)
+ txp = txfrags;
+ else
+ txp++;
}
if (frag_overflow) {
@@ -415,7 +480,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que
skb_shinfo(skb)->frag_list = nskb;
}
- return gop;
+ (*copy_ops) = cop - queue->tx_copy_ops;
+ (*map_ops) = gop - queue->tx_map_ops;
}
static inline void xenvif_grant_handle_set(struct xenvif_queue *queue,
@@ -451,7 +517,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
struct gnttab_copy **gopp_copy)
{
struct gnttab_map_grant_ref *gop_map = *gopp_map;
- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ u16 pending_idx;
/* This always points to the shinfo of the skb being checked, which
* could be either the first or the one on the frag_list
*/
@@ -462,24 +528,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
struct skb_shared_info *first_shinfo = NULL;
int nr_frags = shinfo->nr_frags;
const bool sharedslot = nr_frags &&
- frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
- int i, err;
+ frag_get_pending_idx(&shinfo->frags[0]) ==
+ copy_pending_idx(skb, copy_count(skb) - 1);
+ int i, err = 0;
- /* Check status of header. */
- err = (*gopp_copy)->status;
- if (unlikely(err)) {
- if (net_ratelimit())
- netdev_dbg(queue->vif->dev,
- "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
- (*gopp_copy)->status,
- pending_idx,
- (*gopp_copy)->source.u.ref);
- /* The first frag might still have this slot mapped */
- if (!sharedslot)
- xenvif_idx_release(queue, pending_idx,
- XEN_NETIF_RSP_ERROR);
+ for (i = 0; i < copy_count(skb); i++) {
+ int newerr;
+
+ /* Check status of header. */
+ pending_idx = copy_pending_idx(skb, i);
+
+ newerr = (*gopp_copy)->status;
+ if (likely(!newerr)) {
+ /* The first frag might still have this slot mapped */
+ if (i < copy_count(skb) - 1 || !sharedslot)
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_OKAY);
+ } else {
+ err = newerr;
+ if (net_ratelimit())
+ netdev_dbg(queue->vif->dev,
+ "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n",
+ (*gopp_copy)->status,
+ pending_idx,
+ (*gopp_copy)->source.u.ref);
+ /* The first frag might still have this slot mapped */
+ if (i < copy_count(skb) - 1 || !sharedslot)
+ xenvif_idx_release(queue, pending_idx,
+ XEN_NETIF_RSP_ERROR);
+ }
+ (*gopp_copy)++;
}
- (*gopp_copy)++;
check_frags:
for (i = 0; i < nr_frags; i++, gop_map++) {
@@ -526,14 +605,6 @@ check_frags:
if (err)
continue;
- /* First error: if the header haven't shared a slot with the
- * first frag, release it as well.
- */
- if (!sharedslot)
- xenvif_idx_release(queue,
- XENVIF_TX_CB(skb)->pending_idx,
- XEN_NETIF_RSP_OKAY);
-
/* Invalidate preceding fragments of this skb. */
for (j = 0; j < i; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
@@ -803,7 +874,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
unsigned *copy_ops,
unsigned *map_ops)
{
- struct gnttab_map_grant_ref *gop = queue->tx_map_ops;
struct sk_buff *skb, *nskb;
int ret;
unsigned int frag_overflow;
@@ -885,8 +955,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
continue;
}
+ data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ?
+ XEN_NETBACK_TX_COPY_LEN : txreq.size;
+
ret = xenvif_count_requests(queue, &txreq, extra_count,
txfrags, work_to_do);
+
if (unlikely(ret < 0))
break;
@@ -912,9 +986,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
index = pending_index(queue->pending_cons);
pending_idx = queue->pending_ring[index];
- data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN &&
- ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
- XEN_NETBACK_TX_COPY_LEN : txreq.size;
+ if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size)
+ data_len = txreq.size;
skb = xenvif_alloc_skb(data_len);
if (unlikely(skb == NULL)) {
@@ -925,8 +998,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
}
skb_shinfo(skb)->nr_frags = ret;
- if (data_len < txreq.size)
- skb_shinfo(skb)->nr_frags++;
/* At this point shinfo->nr_frags is in fact the number of
* slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
*/
@@ -988,54 +1059,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
type);
}
- XENVIF_TX_CB(skb)->pending_idx = pending_idx;
-
- __skb_put(skb, data_len);
- queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref;
- queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid;
- queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset;
-
- queue->tx_copy_ops[*copy_ops].dest.u.gmfn =
- virt_to_gfn(skb->data);
- queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF;
- queue->tx_copy_ops[*copy_ops].dest.offset =
- offset_in_page(skb->data) & ~XEN_PAGE_MASK;
-
- queue->tx_copy_ops[*copy_ops].len = data_len;
- queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref;
-
- (*copy_ops)++;
-
- if (data_len < txreq.size) {
- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
- pending_idx);
- xenvif_tx_create_map_op(queue, pending_idx, &txreq,
- extra_count, gop);
- gop++;
- } else {
- frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
- INVALID_PENDING_IDX);
- memcpy(&queue->pending_tx_info[pending_idx].req,
- &txreq, sizeof(txreq));
- queue->pending_tx_info[pending_idx].extra_count =
- extra_count;
- }
-
- queue->pending_cons++;
-
- gop = xenvif_get_requests(queue, skb, txfrags, gop,
- frag_overflow, nskb);
+ xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops,
+ map_ops, frag_overflow, nskb, extra_count,
+ data_len);
__skb_queue_tail(&queue->tx_queue, skb);
queue->tx.req_cons = idx;
- if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) ||
+ if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) ||
(*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops)))
break;
}
- (*map_ops) = gop - queue->tx_map_ops;
return;
}
@@ -1114,9 +1150,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) {
struct xen_netif_tx_request *txp;
u16 pending_idx;
- unsigned data_len;
- pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ pending_idx = copy_pending_idx(skb, 0);
txp = &queue->pending_tx_info[pending_idx].req;
/* Check the remap error code. */
@@ -1135,18 +1170,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
continue;
}
- data_len = skb->len;
- callback_param(queue, pending_idx).ctx = NULL;
- if (data_len < txp->size) {
- /* Append the packet payload as a fragment. */
- txp->offset += data_len;
- txp->size -= data_len;
- } else {
- /* Schedule a response immediately. */
- xenvif_idx_release(queue, pending_idx,
- XEN_NETIF_RSP_OKAY);
- }
-
if (txp->flags & XEN_NETTXF_csum_blank)
skb->ip_summed = CHECKSUM_PARTIAL;
else if (txp->flags & XEN_NETTXF_data_validated)
@@ -1332,7 +1355,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
/* Called after netfront has transmitted */
int xenvif_tx_action(struct xenvif_queue *queue, int budget)
{
- unsigned nr_mops, nr_cops = 0;
+ unsigned nr_mops = 0, nr_cops = 0;
int work_done, ret;
if (unlikely(!tx_work_todo(queue)))
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 932762177110..0ba754ebc5ba 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
return false;
}
-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
{
unsigned long flags;
+ bool ret = true;
spin_lock_irqsave(&queue->rx_queue.lock, flags);
@@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
struct net_device *dev = queue->vif->dev;
netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
- kfree_skb(skb);
- queue->vif->dev->stats.rx_dropped++;
+ ret = false;
} else {
if (skb_queue_empty(&queue->rx_queue))
xenvif_update_needed_slots(queue, skb);
@@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
}
spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+
+ return ret;
}
static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 9af2b027c19c..dc404e05970c 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1862,6 +1862,12 @@ static int netfront_resume(struct xenbus_device *dev)
netif_tx_unlock_bh(info->netdev);
xennet_disconnect_backend(info);
+
+ rtnl_lock();
+ if (info->queues)
+ xennet_destroy_queues(info);
+ rtnl_unlock();
+
return 0;
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index da55ce45ac70..7e3893d06bab 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3095,10 +3095,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
if (!ctrl->identified) {
unsigned int i;
- ret = nvme_init_subsystem(ctrl, id);
- if (ret)
- goto out_free;
-
/*
* Check for quirks. Quirk can depend on firmware version,
* so, in principle, the set of quirks present can change
@@ -3111,6 +3107,10 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
if (quirk_matches(id, &core_quirks[i]))
ctrl->quirks |= core_quirks[i].quirks;
}
+
+ ret = nvme_init_subsystem(ctrl, id);
+ if (ret)
+ goto out_free;
}
memcpy(ctrl->subsys->firmware_rev, id->fr,
sizeof(ctrl->subsys->firmware_rev));
@@ -4304,7 +4304,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
mutex_unlock(&ns->ctrl->subsys->lock);
/* guarantee not available in head->list */
- synchronize_rcu();
+ synchronize_srcu(&ns->head->srcu);
if (!nvme_ns_head_multipath(ns->head))
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 93e2138a8b42..7e025b8948cb 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
struct nvme_ns_head *head = ns->head;
sector_t capacity = get_capacity(head->disk);
int node;
+ int srcu_idx;
+ srcu_idx = srcu_read_lock(&head->srcu);
list_for_each_entry_rcu(ns, &head->list, siblings) {
if (capacity != get_capacity(ns->disk))
clear_bit(NVME_NS_READY, &ns->flags);
}
+ srcu_read_unlock(&head->srcu, srcu_idx);
for_each_node(node)
rcu_assign_pointer(head->current_path[node], NULL);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f4335519399d..488ad7dabeb8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -797,6 +797,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
if (bv->bv_len > first_prp_len)
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
+ else
+ cmnd->dptr.prp2 = 0;
return BLK_STS_OK;
}
diff --git a/drivers/of/property.c b/drivers/of/property.c
index 967f79b59016..134cfc980b70 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -993,8 +993,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
nargs, index, &of_args);
if (ret < 0)
return ret;
- if (!args)
+ if (!args) {
+ of_node_put(of_args.np);
return 0;
+ }
args->nargs = of_args.args_count;
args->fwnode = of_fwnode_handle(of_args.np);
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 52ecd66ce357..047a8374b4fd 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input)
writel(value, padcfg0);
}
+static int __intel_gpio_get_gpio_mode(u32 value)
+{
+ return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
+}
+
static int intel_gpio_get_gpio_mode(void __iomem *padcfg0)
{
- return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
+ return __intel_gpio_get_gpio_mode(readl(padcfg0));
}
static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
@@ -1674,6 +1679,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data);
static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin)
{
const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin);
+ u32 value;
if (!pd || !intel_pad_usable(pctrl, pin))
return false;
@@ -1688,6 +1694,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int
gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin)))
return true;
+ /*
+ * The firmware on some systems may configure GPIO pins to be
+ * an interrupt source in so called "direct IRQ" mode. In such
+ * cases the GPIO controller driver has no idea if those pins
+ * are being used or not. At the same time, there is a known bug
+ * in the firmwares that don't restore the pin settings correctly
+ * after suspend, i.e. by an unknown reason the Rx value becomes
+ * inverted.
+ *
+ * Hence, let's save and restore the pins that are configured
+ * as GPIOs in the input mode with GPIROUTIOXAPIC bit set.
+ *
+ * See https://bugzilla.kernel.org/show_bug.cgi?id=214749.
+ */
+ value = readl(intel_get_padcfg(pctrl, pin, PADCFG0));
+ if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) &&
+ (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO))
+ return true;
+
return false;
}
diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c
index 65d312967619..27f0a54e12bf 100644
--- a/drivers/pinctrl/mediatek/mtk-eint.c
+++ b/drivers/pinctrl/mediatek/mtk-eint.c
@@ -303,12 +303,15 @@ static struct irq_chip mtk_eint_irq_chip = {
static unsigned int mtk_eint_hw_init(struct mtk_eint *eint)
{
- void __iomem *reg = eint->base + eint->regs->dom_en;
+ void __iomem *dom_en = eint->base + eint->regs->dom_en;
+ void __iomem *mask_set = eint->base + eint->regs->mask_set;
unsigned int i;
for (i = 0; i < eint->hw->ap_num; i += 32) {
- writel(0xffffffff, reg);
- reg += 4;
+ writel(0xffffffff, dom_en);
+ writel(0xffffffff, mask_set);
+ dom_en += 4;
+ mask_set += 4;
}
return 0;
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 67bec7ea0f8b..414ee6bb8ac9 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs)
mux_bytes = pcs->width / BITS_PER_BYTE;
- if (pcs->bits_per_mux) {
+ if (pcs->bits_per_mux && pcs->fmask) {
pcs->bits_per_pin = fls(pcs->fmask);
nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin;
} else {
diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c
index ef4ae977b8e0..439d282aafd1 100644
--- a/drivers/platform/x86/amd/pmc.c
+++ b/drivers/platform/x86/amd/pmc.c
@@ -739,8 +739,14 @@ static void amd_pmc_s2idle_prepare(void)
static void amd_pmc_s2idle_check(void)
{
struct amd_pmc_dev *pdev = &pmc;
+ struct smu_metrics table;
int rc;
+ /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */
+ if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) &&
+ table.s0i3_last_entry_status)
+ usleep_range(10000, 20000);
+
/* Dump the IdleMask before we add to the STB */
amd_pmc_idlemask_read(pdev, pdev->dev, NULL);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 9dc935886e9f..c6ded3fdd715 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -758,7 +758,6 @@ static void qeth_l2_br2dev_worker(struct work_struct *work)
struct list_head *iter;
int err = 0;
- kfree(br2dev_event_work);
QETH_CARD_TEXT_(card, 4, "b2dw%04lx", event);
QETH_CARD_TEXT_(card, 4, "ma%012llx", ether_addr_to_u64(addr));
@@ -815,6 +814,7 @@ unlock:
dev_put(brdev);
dev_put(lsyncdev);
dev_put(dstdev);
+ kfree(br2dev_event_work);
}
static int qeth_l2_br2dev_queue_work(struct net_device *brdev,
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 843e830b5f87..ea914a7eaa7f 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -363,8 +363,8 @@ static int envctrl_read_cpu_info(int cpu, struct i2c_child_t *pchild,
char mon_type, unsigned char *bufdata)
{
unsigned char data;
- int i;
- char *tbl, j = -1;
+ int i, j = -1;
+ char *tbl;
/* Find the right monitor type and channel. */
for (i = 0; i < PCF8584_MAX_CHANNELS; i++) {
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 6995c8979230..ac5ff0783b4f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -312,7 +312,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
* Ensure that all tasks observe the host state change before the
* host_failed change.
*/
- call_rcu(&scmd->rcu, scsi_eh_inc_host_failed);
+ call_rcu_hurry(&scmd->rcu, scsi_eh_inc_host_failed);
}
/**
diff --git a/drivers/staging/media/atomisp/pci/hive_types.h b/drivers/staging/media/atomisp/pci/hive_types.h
index 4b8a679fb672..55d36931f079 100644
--- a/drivers/staging/media/atomisp/pci/hive_types.h
+++ b/drivers/staging/media/atomisp/pci/hive_types.h
@@ -42,7 +42,7 @@ typedef unsigned int hive_bool;
#define hive_false 0
#define hive_true 1
-typedef char hive_int8;
+typedef signed char hive_int8;
typedef short hive_int16;
typedef int hive_int32;
typedef long long hive_int64;
diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 4802cfaa107f..a683e21df19c 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -264,8 +264,8 @@ static void hvc_port_destruct(struct tty_port *port)
static void hvc_check_console(int index)
{
- /* Already enabled, bail out */
- if (hvc_console.flags & CON_ENABLED)
+ /* Already registered, bail out */
+ if (console_is_registered(&hvc_console))
return;
/* If this index is what the user requested, then register
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 94fbf0add2ce..74568292186f 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -565,7 +565,7 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
up->port.dev = dev;
- if (uart_console_enabled(&up->port))
+ if (uart_console_registered(&up->port))
pm_runtime_get_sync(up->port.dev);
serial8250_apply_quirks(up);
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index a5f380584cda..4f6e9bf57169 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -181,7 +181,7 @@ int __init setup_earlycon(char *buf)
if (!buf || !buf[0])
return -EINVAL;
- if (early_con.flags & CON_ENABLED)
+ if (console_is_registered(&early_con))
return -EALREADY;
again:
@@ -253,7 +253,7 @@ int __init of_setup_earlycon(const struct earlycon_id *match,
bool big_endian;
u64 addr;
- if (early_con.flags & CON_ENABLED)
+ if (console_is_registered(&early_con))
return -EALREADY;
spin_lock_init(&port->lock);
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index 7aa37be3216a..a3ed9b34e2ab 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -189,9 +189,27 @@ static int configure_kgdboc(void)
if (kgdboc_register_kbd(&cptr))
goto do_register;
+ /*
+ * tty_find_polling_driver() can call uart_set_options()
+ * (via poll_init) to configure the uart. Take the console_list_lock
+ * in order to synchronize against register_console(), which can also
+ * configure the uart via uart_set_options(). This also allows safe
+ * traversal of the console list.
+ */
+ console_list_lock();
+
p = tty_find_polling_driver(cptr, &tty_line);
- if (!p)
+ if (!p) {
+ console_list_unlock();
goto noconfig;
+ }
+
+ /*
+ * Take console_lock to serialize device() callback with
+ * other console operations. For example, fg_console is
+ * modified under console_lock when switching vt.
+ */
+ console_lock();
for_each_console(cons) {
int idx;
@@ -202,6 +220,10 @@ static int configure_kgdboc(void)
}
}
+ console_unlock();
+
+ console_list_unlock();
+
kgdb_tty_driver = p;
kgdb_tty_line = tty_line;
@@ -449,6 +471,7 @@ static void kgdboc_earlycon_pre_exp_handler(void)
{
struct console *con;
static bool already_warned;
+ int cookie;
if (already_warned)
return;
@@ -461,9 +484,14 @@ static void kgdboc_earlycon_pre_exp_handler(void)
* serial drivers might be OK with this, print a warning once per
* boot if we detect this case.
*/
- for_each_console(con)
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
if (con == kgdboc_earlycon_io_ops.cons)
- return;
+ break;
+ }
+ console_srcu_read_unlock(cookie);
+ if (con)
+ return;
already_warned = true;
pr_warn("kgdboc_earlycon is still using bootconsole\n");
@@ -528,7 +556,15 @@ static int __init kgdboc_earlycon_init(char *opt)
* Look for a matching console, or if the name was left blank just
* pick the first one we find.
*/
- console_lock();
+
+ /*
+ * Hold the console_list_lock to guarantee that no consoles are
+ * unregistered until the kgdboc_earlycon setup is complete.
+ * Trapping the exit() callback relies on exit() not being
+ * called until the trap is setup. This also allows safe
+ * traversal of the console list and race-free reading of @flags.
+ */
+ console_list_lock();
for_each_console(con) {
if (con->write && con->read &&
(con->flags & (CON_BOOT | CON_ENABLED)) &&
@@ -570,7 +606,7 @@ static int __init kgdboc_earlycon_init(char *opt)
}
unlock:
- console_unlock();
+ console_list_unlock();
/* Non-zero means malformed option so we always return zero */
return 0;
diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c
index 2beada66c824..c38754d593ca 100644
--- a/drivers/tty/serial/pic32_uart.c
+++ b/drivers/tty/serial/pic32_uart.c
@@ -843,7 +843,7 @@ console_initcall(pic32_console_init);
*/
static int __init pic32_late_console_init(void)
{
- if (!(pic32_console.flags & CON_ENABLED))
+ if (!console_is_registered(&pic32_console))
register_console(&pic32_console);
return 0;
@@ -919,7 +919,7 @@ static int pic32_uart_probe(struct platform_device *pdev)
}
#ifdef CONFIG_SERIAL_PIC32_CONSOLE
- if (uart_console_enabled(port)) {
+ if (uart_console_registered(port)) {
/* The peripheral clock has been enabled by console_setup,
* so disable it till the port is used.
*/
diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c
index 77d1363029f5..9c252c9ca95a 100644
--- a/drivers/tty/serial/samsung_tty.c
+++ b/drivers/tty/serial/samsung_tty.c
@@ -1732,7 +1732,7 @@ static void __init s3c24xx_serial_register_console(void)
static void s3c24xx_serial_unregister_console(void)
{
- if (s3c24xx_serial_console.flags & CON_ENABLED)
+ if (console_is_registered(&s3c24xx_serial_console))
unregister_console(&s3c24xx_serial_console);
}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 179ee199df34..b9fbbee598b8 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2223,11 +2223,11 @@ uart_set_options(struct uart_port *port, struct console *co,
/*
* Ensure that the serial-console lock is initialised early.
*
- * Note that the console-enabled check is needed because of kgdboc,
- * which can end up calling uart_set_options() for an already enabled
+ * Note that the console-registered check is needed because
+ * kgdboc can call uart_set_options() for an already registered
* console via tty_find_polling_driver() and uart_poll_init().
*/
- if (!uart_console_enabled(port) && !port->console_reinit)
+ if (!uart_console_registered_locked(port) && !port->console_reinit)
uart_port_spin_lock_init(port);
memset(&termios, 0, sizeof(struct ktermios));
@@ -2573,7 +2573,7 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
* successfully registered yet, try to re-register it.
* It may be that the port was not available.
*/
- if (port->cons && !(port->cons->flags & CON_ENABLED))
+ if (port->cons && !console_is_registered(port->cons))
register_console(port->cons);
/*
@@ -2956,7 +2956,7 @@ static ssize_t console_show(struct device *dev,
mutex_lock(&port->mutex);
uport = uart_port_check(state);
if (uport)
- console = uart_console_enabled(uport);
+ console = uart_console_registered(uport);
mutex_unlock(&port->mutex);
return sprintf(buf, "%c\n", console ? 'Y' : 'N');
@@ -2978,7 +2978,7 @@ static ssize_t console_store(struct device *dev,
mutex_lock(&port->mutex);
uport = uart_port_check(state);
if (uport) {
- oldconsole = uart_console_enabled(uport);
+ oldconsole = uart_console_registered(uport);
if (oldconsole && !newconsole) {
ret = unregister_console(uport->cons);
} else if (!oldconsole && newconsole) {
@@ -3086,7 +3086,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
* If this port is in use as a console then the spinlock is already
* initialised.
*/
- if (!uart_console_enabled(uport))
+ if (!uart_console_registered(uport))
uart_port_spin_lock_init(uport);
if (uport->cons && uport->dev)
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 62f773286d44..76452fe2af86 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -3054,15 +3054,29 @@ static struct console serial_console = {
};
#ifdef CONFIG_SUPERH
+static char early_serial_buf[32];
+
+static int early_serial_console_setup(struct console *co, char *options)
+{
+ /*
+ * This early console is always registered using the earlyprintk=
+ * parameter, which does not call add_preferred_console(). Thus
+ * @options is always NULL and the options for this early console
+ * are passed using a custom buffer.
+ */
+ WARN_ON(options);
+
+ return serial_console_setup(co, early_serial_buf);
+}
+
static struct console early_serial_console = {
.name = "early_ttySC",
.write = serial_console_write,
+ .setup = early_serial_console_setup,
.flags = CON_PRINTBUFFER,
.index = -1,
};
-static char early_serial_buf[32];
-
static int sci_probe_earlyprintk(struct platform_device *pdev)
{
const struct plat_sci_port *cfg = dev_get_platdata(&pdev->dev);
@@ -3074,8 +3088,6 @@ static int sci_probe_earlyprintk(struct platform_device *pdev)
sci_init_single(pdev, &sci_ports[pdev->id], pdev->id, cfg, true);
- serial_console_setup(&early_serial_console, early_serial_buf);
-
if (!strstr(early_serial_buf, "keep"))
early_serial_console.flags |= CON_BOOT;
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index 2eff7cff57c4..0cbd1892c53b 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -1631,7 +1631,7 @@ static int cdns_uart_probe(struct platform_device *pdev)
#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
/* This is not port which is used for console that's why clean it up */
if (console_port == port &&
- !(cdns_uart_uart_driver.cons->flags & CON_ENABLED)) {
+ !console_is_registered(cdns_uart_uart_driver.cons)) {
console_port = NULL;
cdns_uart_console.index = -1;
}
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index de06c3c2ff70..cafdff575716 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3526,7 +3526,14 @@ static ssize_t show_cons_active(struct device *dev,
struct console *c;
ssize_t count = 0;
- console_lock();
+ /*
+ * Hold the console_list_lock to guarantee that no consoles are
+ * unregistered until all console processing is complete.
+ * This also allows safe traversal of the console list and
+ * race-free reading of @flags.
+ */
+ console_list_lock();
+
for_each_console(c) {
if (!c->device)
continue;
@@ -3538,6 +3545,13 @@ static ssize_t show_cons_active(struct device *dev,
if (i >= ARRAY_SIZE(cs))
break;
}
+
+ /*
+ * Take console_lock to serialize device() callback with
+ * other console operations. For example, fg_console is
+ * modified under console_lock when switching vt.
+ */
+ console_lock();
while (i--) {
int index = cs[i]->index;
struct tty_driver *drv = cs[i]->device(cs[i], &index);
@@ -3553,6 +3567,8 @@ static ssize_t show_cons_active(struct device *dev,
}
console_unlock();
+ console_list_unlock();
+
return count;
}
static DEVICE_ATTR(active, S_IRUGO, show_cons_active, NULL);
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index bfb7e2b85299..797047154820 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -927,7 +927,7 @@ void __init early_xdbc_register_console(void)
static void xdbc_unregister_console(void)
{
- if (early_xdbc_console.flags & CON_ENABLED)
+ if (console_is_registered(&early_xdbc_console))
unregister_console(&early_xdbc_console);
}
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 4d2694d904aa..8752d389e382 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -504,18 +504,14 @@ static void xenfb_make_preferred_console(void)
if (console_set_on_cmdline)
return;
- console_lock();
+ console_list_lock();
for_each_console(c) {
if (!strcmp(c->name, "tty") && c->index == 0)
break;
}
- console_unlock();
- if (c) {
- unregister_console(c);
- c->flags |= CON_CONSDEV;
- c->flags &= ~CON_PRINTBUFFER; /* don't print again */
- register_console(c);
- }
+ if (c)
+ console_force_preferred_locked(c);
+ console_list_unlock();
}
static int xenfb_resume(struct xenbus_device *dev)
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index c0031a3ab42f..3ac5fcf98d0d 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -167,8 +167,8 @@ responded:
clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
}
- if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
- rtt_us < server->probe.rtt) {
+ rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us);
+ if (rtt_us < server->probe.rtt) {
server->probe.rtt = rtt_us;
server->rtt = rtt_us;
alist->preferred = index;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 4981baf97835..b5237206eac3 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server,
if (!server)
return;
- a = atomic_inc_return(&server->active);
+ a = atomic_read(&server->active);
zero = __refcount_dec_and_test(&server->ref, &r);
trace_afs_server(debug_id, r - 1, a, reason);
if (unlikely(zero))
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6a11025e5850..d9af34f816a9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -248,7 +248,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
} while (0)
#ifdef ARCH_DLINFO
- /*
+ /*
* ARCH_DLINFO must come first so PPC can do its special alignment of
* AUXV.
* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
@@ -456,13 +456,13 @@ static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
*
* Loads ELF program headers from the binary file elf_file, which has the ELF
* header pointed to by elf_ex, into a newly allocated array. The caller is
- * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
+ * responsible for freeing the allocated data. Returns NULL upon failure.
*/
static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
struct file *elf_file)
{
struct elf_phdr *elf_phdata = NULL;
- int retval, err = -1;
+ int retval = -1;
unsigned int size;
/*
@@ -484,15 +484,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
/* Read in the program headers */
retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
- if (retval < 0) {
- err = retval;
- goto out;
- }
- /* Success! */
- err = 0;
out:
- if (err) {
+ if (retval) {
kfree(elf_phdata);
elf_phdata = NULL;
}
@@ -1020,7 +1014,7 @@ out_free_interp:
executable_stack);
if (retval < 0)
goto out_free_dentry;
-
+
elf_bss = 0;
elf_brk = 0;
@@ -1043,7 +1037,7 @@ out_free_interp:
if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
-
+
/* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
@@ -1166,7 +1160,7 @@ out_free_interp:
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
- retval = IS_ERR((void *)error) ?
+ retval = IS_ERR_VALUE(error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
@@ -1251,7 +1245,7 @@ out_free_interp:
interpreter,
load_bias, interp_elf_phdata,
&arch_state);
- if (!IS_ERR((void *)elf_entry)) {
+ if (!IS_ERR_VALUE(elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
@@ -1260,7 +1254,7 @@ out_free_interp:
elf_entry += interp_elf_ex->e_entry;
}
if (BAD_ADDR(elf_entry)) {
- retval = IS_ERR((void *)elf_entry) ?
+ retval = IS_ERR_VALUE(elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
@@ -1521,7 +1515,7 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
phdr->p_align = 0;
}
-static void fill_note(struct memelfnote *note, const char *name, int type,
+static void fill_note(struct memelfnote *note, const char *name, int type,
unsigned int sz, void *data)
{
note->name = name;
@@ -2004,8 +1998,8 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
t->num_notes = 0;
fill_prstatus(&t->prstatus.common, p, signr);
- elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
-
+ elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
+
fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
&(t->prstatus));
t->num_notes++;
@@ -2295,7 +2289,7 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
- /* write out the notes section */
+ /* write out the notes section */
if (!write_note_info(&info, cprm))
goto end_coredump;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 08d0c8797828..096e3520a0b1 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
current->mm->start_stack = current->mm->start_brk + stack_size;
#endif
- if (create_elf_fdpic_tables(bprm, current->mm,
- &exec_params, &interp_params) < 0)
+ retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params,
+ &interp_params);
+ if (retval < 0)
goto error;
kdebug("- start_code %lx", current->mm->start_code);
@@ -1603,7 +1604,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
if (!elf_core_write_extra_phdrs(cprm, offset))
goto end_coredump;
- /* write out the notes section */
+ /* write out the notes section */
if (!writenote(thread_list->notes, cprm))
goto end_coredump;
if (!writenote(&psinfo_note, cprm))
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index e1eae7ea823a..bb202ad369d5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -44,10 +44,10 @@ static LIST_HEAD(entries);
static int enabled = 1;
enum {Enabled, Magic};
-#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
-#define MISC_FMT_OPEN_BINARY (1 << 30)
-#define MISC_FMT_CREDENTIALS (1 << 29)
-#define MISC_FMT_OPEN_FILE (1 << 28)
+#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
+#define MISC_FMT_OPEN_BINARY (1UL << 30)
+#define MISC_FMT_CREDENTIALS (1UL << 29)
+#define MISC_FMT_OPEN_FILE (1UL << 28)
typedef struct {
struct list_head list;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 3e2843e86e27..f3b461c708a8 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -364,7 +364,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
*fcntl_count = 0;
*flock_count = 0;
- ctx = inode->i_flctx;
+ ctx = locks_inode_context(inode);
if (ctx) {
spin_lock(&ctx->flc_lock);
list_for_each_entry(lock, &ctx->flc_posix, fl_list)
@@ -418,7 +418,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
int num_fcntl_locks, int num_flock_locks)
{
struct file_lock *lock;
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(inode);
int err = 0;
int seen_fcntl = 0;
int seen_flock = 0;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cd9698209930..6c1431979495 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1413,7 +1413,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
struct inode *inode = d_inode(cfile->dentry);
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct file_lock *flock;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
unsigned int count = 0, i;
int rc = 0, xid, type;
struct list_head locks_to_send, *el;
diff --git a/fs/exec.c b/fs/exec.c
index a0b1f0337a62..089a743f636b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -64,6 +64,7 @@
#include <linux/io_uring.h>
#include <linux/syscall_user_dispatch.h>
#include <linux/coredump.h>
+#include <linux/time_namespace.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -171,7 +172,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
exit:
fput(file);
out:
- return error;
+ return error;
}
#endif /* #ifdef CONFIG_USELIB */
@@ -199,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
{
struct page *page;
int ret;
- unsigned int gup_flags = FOLL_FORCE;
+ unsigned int gup_flags = 0;
#ifdef CONFIG_STACK_GROWSUP
if (write) {
@@ -842,16 +843,13 @@ int setup_arg_pages(struct linux_binprm *bprm,
* will align it up.
*/
rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
+
+ stack_expand = min(rlim_stack, stack_size + stack_expand);
+
#ifdef CONFIG_STACK_GROWSUP
- if (stack_size + stack_expand > rlim_stack)
- stack_base = vma->vm_start + rlim_stack;
- else
- stack_base = vma->vm_end + stack_expand;
+ stack_base = vma->vm_start + stack_expand;
#else
- if (stack_size + stack_expand > rlim_stack)
- stack_base = vma->vm_end - rlim_stack;
- else
- stack_base = vma->vm_start - stack_expand;
+ stack_base = vma->vm_end - stack_expand;
#endif
current->mm->start_stack = bprm->p;
ret = expand_stack(vma, stack_base);
@@ -1297,6 +1295,10 @@ int begin_new_exec(struct linux_binprm * bprm)
bprm->mm = NULL;
+ retval = exec_task_namespaces();
+ if (retval)
+ goto out_unlock;
+
#ifdef CONFIG_POSIX_TIMERS
spin_lock_irq(&me->sighand->siglock);
posix_cpu_timers_exit(me);
@@ -1568,6 +1570,12 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
if (task_no_new_privs(current))
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
+ /*
+ * If another task is sharing our fs, we cannot safely
+ * suid exec because the differently privileged task
+ * will be able to manipulate the current directory, etc.
+ * It would be nice to force an unshare instead...
+ */
t = p;
n_fs = 1;
spin_lock(&p->fs->lock);
@@ -1748,6 +1756,7 @@ static int search_binary_handler(struct linux_binprm *bprm)
return retval;
}
+/* binfmt handlers will call back into begin_new_exec() on success. */
static int exec_binprm(struct linux_binprm *bprm)
{
pid_t old_pid, old_vpid;
@@ -1806,6 +1815,11 @@ static int bprm_execve(struct linux_binprm *bprm,
if (retval)
return retval;
+ /*
+ * Check for unsafe execution states before exec_binprm(), which
+ * will call back into begin_new_exec(), into bprm_creds_from_file(),
+ * where setuid-ness is evaluated.
+ */
check_unsafe_exec(bprm);
current->in_execve = 1;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 451d8a077e12..bce2492186d0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -605,6 +605,14 @@ again:
set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags);
queue = true;
}
+ /*
+ * We could race with cookie_lru which may set LRU_DISCARD bit
+ * but has yet to run the cookie state machine. If this happens
+ * and another thread tries to use the cookie, clear LRU_DISCARD
+ * so we don't end up withdrawing the cookie while in use.
+ */
+ if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags))
+ fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear);
break;
case FSCACHE_COOKIE_STATE_FAILED:
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 71bfb663aac5..89f4741728ba 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
.mode = mode
};
int err;
- bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
- (mode & (FALLOC_FL_PUNCH_HOLE |
- FALLOC_FL_ZERO_RANGE));
-
- bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
+ bool block_faults = FUSE_IS_DAX(inode) &&
+ (!(mode & FALLOC_FL_KEEP_SIZE) ||
+ (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)));
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_ZERO_RANGE))
@@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (fm->fc->no_fallocate)
return -EOPNOTSUPP;
- if (lock_inode) {
- inode_lock(inode);
- if (block_faults) {
- filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
- if (err)
- goto out;
- }
+ inode_lock(inode);
+ if (block_faults) {
+ filemap_invalidate_lock(inode->i_mapping);
+ err = fuse_dax_break_layouts(inode, 0, 0);
+ if (err)
+ goto out;
+ }
- if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
- loff_t endbyte = offset + length - 1;
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) {
+ loff_t endbyte = offset + length - 1;
- err = fuse_writeback_range(inode, offset, endbyte);
- if (err)
- goto out;
- }
+ err = fuse_writeback_range(inode, offset, endbyte);
+ if (err)
+ goto out;
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
@@ -3039,8 +3035,7 @@ out:
if (block_faults)
filemap_invalidate_unlock(inode->i_mapping);
- if (lock_inode)
- inode_unlock(inode);
+ inode_unlock(inode);
fuse_flush_time_update(inode);
diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 94b8ed4ef870..72e981c9a572 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -321,7 +321,7 @@ static int check_lock_range(struct file *filp, loff_t start, loff_t end,
unsigned char type)
{
struct file_lock *flock;
- struct file_lock_context *ctx = file_inode(filp)->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(file_inode(filp));
int error = 0;
if (!ctx || list_empty_careful(&ctx->flc_posix))
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index e1c4617de771..720684345817 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -207,7 +207,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct nlm_host *lockhost;
if (!flctx || list_empty_careful(&flctx->flc_posix))
@@ -262,7 +262,7 @@ nlm_file_inuse(struct nlm_file *file)
{
struct inode *inode = nlmsvc_file_inode(file);
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1;
diff --git a/fs/locks.c b/fs/locks.c
index 607f94a0e789..8f01bee17715 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -175,7 +175,7 @@ locks_get_lock_context(struct inode *inode, int type)
struct file_lock_context *ctx;
/* paired with cmpxchg() below */
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (likely(ctx) || type == F_UNLCK)
goto out;
@@ -194,7 +194,7 @@ locks_get_lock_context(struct inode *inode, int type)
*/
if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
kmem_cache_free(flctx_cache, ctx);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
}
out:
trace_locks_get_lock_context(inode, type, ctx);
@@ -247,7 +247,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list,
void
locks_free_lock_context(struct inode *inode)
{
- struct file_lock_context *ctx = inode->i_flctx;
+ struct file_lock_context *ctx = locks_inode_context(inode);
if (unlikely(ctx)) {
locks_check_ctx_lists(inode);
@@ -891,7 +891,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
void *owner;
void (*func)(void);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
fl->fl_type = F_UNLCK;
return;
@@ -1483,7 +1483,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
new_fl->fl_flags = type;
/* typically we will check that ctx is non-NULL before calling */
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx) {
WARN_ON_ONCE(1);
goto free_lock;
@@ -1588,7 +1588,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
struct file_lock_context *ctx;
struct file_lock *fl;
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
fl = list_first_entry_or_null(&ctx->flc_lease,
@@ -1634,7 +1634,7 @@ int fcntl_getlease(struct file *filp)
int type = F_UNLCK;
LIST_HEAD(dispose);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
@@ -1823,7 +1823,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
struct file_lock_context *ctx;
LIST_HEAD(dispose);
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx) {
trace_generic_delete_lease(inode, NULL);
return error;
@@ -2096,7 +2096,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
* throw a warning to let people know that they don't actually work.
*/
if (cmd & LOCK_MAND) {
- pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
+ pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid);
return 0;
}
@@ -2146,6 +2146,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
*/
int vfs_test_lock(struct file *filp, struct file_lock *fl)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, F_GETLK, fl);
posix_test_lock(filp, fl);
@@ -2295,6 +2296,7 @@ out:
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, cmd, fl);
else
@@ -2561,7 +2563,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
* posix_lock_file(). Another process could be setting a lock on this
* file at the same time, but we wouldn't remove that lock anyway.
*/
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx || list_empty(&ctx->flc_posix))
return;
@@ -2634,7 +2636,7 @@ void locks_remove_file(struct file *filp)
{
struct file_lock_context *ctx;
- ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
+ ctx = locks_inode_context(locks_inode(filp));
if (!ctx)
return;
@@ -2663,12 +2665,36 @@ void locks_remove_file(struct file *filp)
*/
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
{
+ WARN_ON_ONCE(filp != fl->fl_file);
if (filp->f_op->lock)
return filp->f_op->lock(filp, F_CANCELLK, fl);
return 0;
}
EXPORT_SYMBOL_GPL(vfs_cancel_lock);
+/**
+ * vfs_inode_has_locks - are any file locks held on @inode?
+ * @inode: inode to check for locks
+ *
+ * Return true if there are any FL_POSIX or FL_FLOCK locks currently
+ * set on @inode.
+ */
+bool vfs_inode_has_locks(struct inode *inode)
+{
+ struct file_lock_context *ctx;
+ bool ret;
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return false;
+
+ spin_lock(&ctx->flc_lock);
+ ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
+ spin_unlock(&ctx->flc_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
+
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -2839,7 +2865,7 @@ void show_fd_locks(struct seq_file *f,
struct file_lock_context *ctx;
int id = 0;
- ctx = smp_load_acquire(&inode->i_flctx);
+ ctx = locks_inode_context(inode);
if (!ctx)
return;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index ead8a0e06abf..cf7365581031 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -146,7 +146,7 @@ static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_state
{
struct inode *inode = state->inode;
struct file_lock *fl;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct list_head *list;
int status = 0;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a2d2d5d1b088..dd18344648f3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1501,7 +1501,7 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
struct file_lock *fl;
struct nfs4_lock_state *lsp;
int status = 0;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct list_head *list;
if (flctx == NULL)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 317cedfa52bf..16be6dae524f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1055,7 +1055,7 @@ static unsigned int nfs_coalesce_size(struct nfs_page *prev,
if (prev) {
if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev)))
return 0;
- flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx;
+ flctx = locks_inode_context(d_inode(nfs_req_openctx(req)->dentry));
if (flctx != NULL &&
!(list_empty_careful(&flctx->flc_posix) &&
list_empty_careful(&flctx->flc_flock)) &&
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f41d24b54fd1..80c240e50952 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1185,7 +1185,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct nfs_lock_context *l_ctx;
- struct file_lock_context *flctx = file_inode(file)->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(file_inode(file));
struct nfs_page *req;
int do_flush, status;
/*
@@ -1321,7 +1321,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page,
struct inode *inode, unsigned int pagelen)
{
int ret;
- struct file_lock_context *flctx = inode->i_flctx;
+ struct file_lock_context *flctx = locks_inode_context(inode);
struct file_lock *fl;
if (file->f_flags & O_DSYNC)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 836bd825ca4a..da8d0ea66229 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4758,7 +4758,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
static bool nfsd4_deleg_present(const struct inode *inode)
{
- struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
+ struct file_lock_context *ctx = locks_inode_context(inode);
return ctx && !list_empty_careful(&ctx->flc_lease);
}
@@ -5897,7 +5897,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
nf = stp->st_stid.sc_file;
- ctx = nf->fi_inode->i_flctx;
+ ctx = locks_inode_context(nf->fi_inode);
if (!ctx)
continue;
if (locks_owner_has_blockers(ctx, lo))
@@ -7713,7 +7713,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
}
inode = locks_inode(nf->nf_file);
- flctx = inode->i_flctx;
+ flctx = locks_inode_context(inode);
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
spin_lock(&flctx->flc_lock);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 3b55e239705f..9930fa901039 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
kunmap_atomic(kaddr);
nilfs_dat_commit_entry(dat, req);
+
+ if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) {
+ nilfs_error(dat->i_sb,
+ "state inconsistency probably due to duplicate use of vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ return;
+ }
nilfs_palloc_commit_free_entry(dat, req);
}
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index dfe6ce3505ce..e0758fe7936d 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -33,7 +33,16 @@ static int show_console_dev(struct seq_file *m, void *v)
if (con->device) {
const struct tty_driver *driver;
int index;
+
+ /*
+ * Take console_lock to serialize device() callback with
+ * other console operations. For example, fg_console is
+ * modified under console_lock when switching vt.
+ */
+ console_lock();
driver = con->device(con, &index);
+ console_unlock();
+
if (driver) {
dev = MKDEV(driver->major, driver->minor_start);
dev += index;
@@ -63,7 +72,12 @@ static void *c_start(struct seq_file *m, loff_t *pos)
struct console *con;
loff_t off = 0;
- console_lock();
+ /*
+ * Hold the console_list_lock to guarantee safe traversal of the
+ * console list. SRCU cannot be used because there is no
+ * place to store the SRCU cookie.
+ */
+ console_list_lock();
for_each_console(con)
if (off++ == *pos)
break;
@@ -74,13 +88,14 @@ static void *c_start(struct seq_file *m, loff_t *pos)
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
struct console *con = v;
+
++*pos;
- return con->next;
+ return hlist_entry_safe(con->node.next, struct console, node);
}
static void c_stop(struct seq_file *m, void *v)
{
- console_unlock();
+ console_list_unlock();
}
static const struct seq_operations consoles_op = {
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 0c034ea39954..cbc0b468c1ab 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -89,6 +89,11 @@ static char *compress =
module_param(compress, charp, 0444);
MODULE_PARM_DESC(compress, "compression to use");
+/* How much of the kernel log to snapshot */
+unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
+module_param(kmsg_bytes, ulong, 0444);
+MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)");
+
/* Compression parameters */
static struct crypto_comp *tfm;
@@ -100,9 +105,6 @@ struct pstore_zbackend {
static char *big_oops_buf;
static size_t big_oops_buf_sz;
-/* How much of the console log to snapshot */
-unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
-
void pstore_set_kmsg_bytes(int bytes)
{
kmsg_bytes = bytes;
@@ -391,6 +393,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
const char *why;
unsigned int part = 1;
unsigned long flags = 0;
+ int saved_ret = 0;
int ret;
why = kmsg_dump_reason_str(reason);
@@ -461,12 +464,21 @@ static void pstore_dump(struct kmsg_dumper *dumper,
if (ret == 0 && reason == KMSG_DUMP_OOPS) {
pstore_new_entry = 1;
pstore_timer_kick();
+ } else {
+ /* Preserve only the first non-zero returned value. */
+ if (!saved_ret)
+ saved_ret = ret;
}
total += record.size;
part++;
}
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+
+ if (saved_ret) {
+ pr_err_once("backend (%s) writing error (%d)\n", psinfo->name,
+ saved_ret);
+ }
}
static struct kmsg_dumper pstore_dumper = {
@@ -562,8 +574,9 @@ out:
int pstore_register(struct pstore_info *psi)
{
if (backend && strcmp(backend, psi->name)) {
- pr_warn("ignoring unexpected backend '%s'\n", psi->name);
- return -EPERM;
+ pr_warn("backend '%s' already in use: ignoring '%s'\n",
+ backend, psi->name);
+ return -EBUSY;
}
/* Sanity check flags. */
@@ -662,6 +675,8 @@ void pstore_unregister(struct pstore_info *psi)
psinfo = NULL;
kfree(backend);
backend = NULL;
+
+ pr_info("Unregistered %s as persistent store backend\n", psi->name);
mutex_unlock(&psinfo_lock);
}
EXPORT_SYMBOL_GPL(pstore_unregister);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index fefe3d391d3a..9a5052431fd3 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -18,10 +18,11 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/compiler.h>
-#include <linux/pstore_ram.h>
#include <linux/of.h>
#include <linux/of_address.h>
+
#include "internal.h"
+#include "ram_internal.h"
#define RAMOOPS_KERNMSG_HDR "===="
#define MIN_MEM_SIZE 4096UL
@@ -451,20 +452,28 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
{
int i;
+ /* Free pmsg PRZ */
+ persistent_ram_free(&cxt->mprz);
+
+ /* Free console PRZ */
+ persistent_ram_free(&cxt->cprz);
+
/* Free dump PRZs */
if (cxt->dprzs) {
for (i = 0; i < cxt->max_dump_cnt; i++)
- persistent_ram_free(cxt->dprzs[i]);
+ persistent_ram_free(&cxt->dprzs[i]);
kfree(cxt->dprzs);
+ cxt->dprzs = NULL;
cxt->max_dump_cnt = 0;
}
/* Free ftrace PRZs */
if (cxt->fprzs) {
for (i = 0; i < cxt->max_ftrace_cnt; i++)
- persistent_ram_free(cxt->fprzs[i]);
+ persistent_ram_free(&cxt->fprzs[i]);
kfree(cxt->fprzs);
+ cxt->fprzs = NULL;
cxt->max_ftrace_cnt = 0;
}
}
@@ -548,9 +557,10 @@ static int ramoops_init_przs(const char *name,
while (i > 0) {
i--;
- persistent_ram_free(prz_ar[i]);
+ persistent_ram_free(&prz_ar[i]);
}
kfree(prz_ar);
+ prz_ar = NULL;
goto fail;
}
*paddr += zone_sz;
@@ -735,6 +745,7 @@ static int ramoops_probe(struct platform_device *pdev)
/* Make sure we didn't get bogus platform data pointer. */
if (!pdata) {
pr_err("NULL platform data\n");
+ err = -EINVAL;
goto fail_out;
}
@@ -742,6 +753,7 @@ static int ramoops_probe(struct platform_device *pdev)
!pdata->ftrace_size && !pdata->pmsg_size)) {
pr_err("The memory size and the record/console size must be "
"non-zero\n");
+ err = -EINVAL;
goto fail_out;
}
@@ -772,12 +784,17 @@ static int ramoops_probe(struct platform_device *pdev)
dump_mem_sz, cxt->record_size,
&cxt->max_dump_cnt, 0, 0);
if (err)
- goto fail_out;
+ goto fail_init;
err = ramoops_init_prz("console", dev, cxt, &cxt->cprz, &paddr,
cxt->console_size, 0);
if (err)
- goto fail_init_cprz;
+ goto fail_init;
+
+ err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr,
+ cxt->pmsg_size, 0);
+ if (err)
+ goto fail_init;
cxt->max_ftrace_cnt = (cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
? nr_cpu_ids
@@ -788,12 +805,7 @@ static int ramoops_probe(struct platform_device *pdev)
(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)
? PRZ_FLAG_NO_LOCK : 0);
if (err)
- goto fail_init_fprz;
-
- err = ramoops_init_prz("pmsg", dev, cxt, &cxt->mprz, &paddr,
- cxt->pmsg_size, 0);
- if (err)
- goto fail_init_mprz;
+ goto fail_init;
cxt->pstore.data = cxt;
/*
@@ -857,11 +869,7 @@ fail_buf:
kfree(cxt->pstore.buf);
fail_clear:
cxt->pstore.bufsize = 0;
- persistent_ram_free(cxt->mprz);
-fail_init_mprz:
-fail_init_fprz:
- persistent_ram_free(cxt->cprz);
-fail_init_cprz:
+fail_init:
ramoops_free_przs(cxt);
fail_out:
return err;
@@ -876,8 +884,6 @@ static int ramoops_remove(struct platform_device *pdev)
kfree(cxt->pstore.buf);
cxt->pstore.bufsize = 0;
- persistent_ram_free(cxt->mprz);
- persistent_ram_free(cxt->cprz);
ramoops_free_przs(cxt);
return 0;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index a89e33719fcf..966191d3a5ba 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -13,13 +13,14 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/memblock.h>
-#include <linux/pstore_ram.h>
#include <linux/rslib.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/page.h>
+#include "ram_internal.h"
+
/**
* struct persistent_ram_buffer - persistent circular RAM buffer
*
@@ -439,7 +440,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size,
phys_addr_t addr = page_start + i * PAGE_SIZE;
pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
}
- vaddr = vmap(pages, page_count, VM_MAP, prot);
+ /*
+ * VM_IOREMAP used here to bypass this region during vread()
+ * and kmap_atomic() (i.e. kcore) to avoid __va() failures.
+ */
+ vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot);
kfree(pages);
/*
@@ -543,8 +548,14 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
return 0;
}
-void persistent_ram_free(struct persistent_ram_zone *prz)
+void persistent_ram_free(struct persistent_ram_zone **_prz)
{
+ struct persistent_ram_zone *prz;
+
+ if (!_prz)
+ return;
+
+ prz = *_prz;
if (!prz)
return;
@@ -568,6 +579,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
persistent_ram_free_old(prz);
kfree(prz->label);
kfree(prz);
+ *_prz = NULL;
}
struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
@@ -604,6 +616,6 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
return prz;
err:
- persistent_ram_free(prz);
+ persistent_ram_free(&prz);
return ERR_PTR(ret);
}
diff --git a/fs/pstore/ram_internal.h b/fs/pstore/ram_internal.h
new file mode 100644
index 000000000000..5f694698351f
--- /dev/null
+++ b/fs/pstore/ram_internal.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2010 Marco Stornelli <[email protected]>
+ * Copyright (C) 2011 Kees Cook <[email protected]>
+ * Copyright (C) 2011 Google, Inc.
+ */
+
+#include <linux/pstore_ram.h>
+
+/*
+ * Choose whether access to the RAM zone requires locking or not. If a zone
+ * can be written to from different CPUs like with ftrace for example, then
+ * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required.
+ */
+#define PRZ_FLAG_NO_LOCK BIT(0)
+/*
+ * If a PRZ should only have a single-boot lifetime, this marks it as
+ * getting wiped after its contents get copied out after boot.
+ */
+#define PRZ_FLAG_ZAP_OLD BIT(1)
+
+/**
+ * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ)
+ * used as a pstore backend
+ *
+ * @paddr: physical address of the mapped RAM area
+ * @size: size of mapping
+ * @label: unique name of this PRZ
+ * @type: frontend type for this PRZ
+ * @flags: holds PRZ_FLAGS_* bits
+ *
+ * @buffer_lock:
+ * locks access to @buffer "size" bytes and "start" offset
+ * @buffer:
+ * pointer to actual RAM area managed by this PRZ
+ * @buffer_size:
+ * bytes in @buffer->data (not including any trailing ECC bytes)
+ *
+ * @par_buffer:
+ * pointer into @buffer->data containing ECC bytes for @buffer->data
+ * @par_header:
+ * pointer into @buffer->data containing ECC bytes for @buffer header
+ * (i.e. all fields up to @data)
+ * @rs_decoder:
+ * RSLIB instance for doing ECC calculations
+ * @corrected_bytes:
+ * ECC corrected bytes accounting since boot
+ * @bad_blocks:
+ * ECC uncorrectable bytes accounting since boot
+ * @ecc_info:
+ * ECC configuration details
+ *
+ * @old_log:
+ * saved copy of @buffer->data prior to most recent wipe
+ * @old_log_size:
+ * bytes contained in @old_log
+ *
+ */
+struct persistent_ram_zone {
+ phys_addr_t paddr;
+ size_t size;
+ void *vaddr;
+ char *label;
+ enum pstore_type_id type;
+ u32 flags;
+
+ raw_spinlock_t buffer_lock;
+ struct persistent_ram_buffer *buffer;
+ size_t buffer_size;
+
+ char *par_buffer;
+ char *par_header;
+ struct rs_control *rs_decoder;
+ int corrected_bytes;
+ int bad_blocks;
+ struct persistent_ram_ecc_info ecc_info;
+
+ char *old_log;
+ size_t old_log_size;
+};
+
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+ u32 sig, struct persistent_ram_ecc_info *ecc_info,
+ unsigned int memtype, u32 flags, char *label);
+void persistent_ram_free(struct persistent_ram_zone **_prz);
+void persistent_ram_zap(struct persistent_ram_zone *prz);
+
+int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
+ unsigned int count);
+int persistent_ram_write_user(struct persistent_ram_zone *prz,
+ const void __user *s, unsigned int count);
+
+void persistent_ram_save_old(struct persistent_ram_zone *prz);
+size_t persistent_ram_old_size(struct persistent_ram_zone *prz);
+void *persistent_ram_old(struct persistent_ram_zone *prz);
+void persistent_ram_free_old(struct persistent_ram_zone *prz);
+ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
+ char *str, size_t len);
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index 017d0d4ad329..2770746bb7aa 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -761,7 +761,7 @@ static inline int notrace psz_kmsg_write_record(struct psz_context *cxt,
/* avoid destroying old data, allocate a new one */
len = zone->buffer_size + sizeof(*zone->buffer);
zone->oldbuf = zone->buffer;
- zone->buffer = kzalloc(len, GFP_KERNEL);
+ zone->buffer = kzalloc(len, GFP_ATOMIC);
if (!zone->buffer) {
zone->buffer = zone->oldbuf;
return -ENOMEM;
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 492dce43236e..cab7cfebf40b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#define tlb_needs_table_invalidate() (true)
#endif
+void tlb_remove_table_sync_one(void);
+
#else
#ifdef tlb_needs_table_invalidate
#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
#endif
+static inline void tlb_remove_table_sync_one(void) { }
+
#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
index 5755ae5a4712..6a869682c120 100644
--- a/include/linux/can/platform/sja1000.h
+++ b/include/linux/can/platform/sja1000.h
@@ -14,7 +14,7 @@
#define OCR_MODE_TEST 0x01
#define OCR_MODE_NORMAL 0x02
#define OCR_MODE_CLOCK 0x03
-#define OCR_MODE_MASK 0x07
+#define OCR_MODE_MASK 0x03
#define OCR_TX0_INVERT 0x04
#define OCR_TX0_PULLDOWN 0x08
#define OCR_TX0_PULLUP 0x10
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 528bd44b59e2..2b7d077de7ef 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -68,6 +68,7 @@ struct css_task_iter {
struct list_head iters_node; /* css_set->task_iters */
};
+extern struct file_system_type cgroup_fs_type;
extern struct cgroup_root cgrp_dfl_root;
extern struct css_set init_css_set;
diff --git a/include/linux/console.h b/include/linux/console.h
index 8c1686e2c233..9cea254b34b8 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -15,6 +15,7 @@
#define _LINUX_CONSOLE_H_ 1
#include <linux/atomic.h>
+#include <linux/rculist.h>
#include <linux/types.h>
struct vc_data;
@@ -154,14 +155,132 @@ struct console {
u64 seq;
unsigned long dropped;
void *data;
- struct console *next;
+ struct hlist_node node;
};
+#ifdef CONFIG_LOCKDEP
+extern void lockdep_assert_console_list_lock_held(void);
+#else
+static inline void lockdep_assert_console_list_lock_held(void)
+{
+}
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern bool console_srcu_read_lock_is_held(void);
+#else
+static inline bool console_srcu_read_lock_is_held(void)
+{
+ return 1;
+}
+#endif
+
+extern int console_srcu_read_lock(void);
+extern void console_srcu_read_unlock(int cookie);
+
+extern void console_list_lock(void) __acquires(console_mutex);
+extern void console_list_unlock(void) __releases(console_mutex);
+
+extern struct hlist_head console_list;
+
+/**
+ * console_srcu_read_flags - Locklessly read the console flags
+ * @con: struct console pointer of console to read flags from
+ *
+ * This function provides the necessary READ_ONCE() and data_race()
+ * notation for locklessly reading the console flags. The READ_ONCE()
+ * in this function matches the WRITE_ONCE() when @flags are modified
+ * for registered consoles with console_srcu_write_flags().
+ *
+ * Only use this function to read console flags when locklessly
+ * iterating the console list via srcu.
+ *
+ * Context: Any context.
+ */
+static inline short console_srcu_read_flags(const struct console *con)
+{
+ WARN_ON_ONCE(!console_srcu_read_lock_is_held());
+
+ /*
+ * Locklessly reading console->flags provides a consistent
+ * read value because there is at most one CPU modifying
+ * console->flags and that CPU is using only read-modify-write
+ * operations to do so.
+ */
+ return data_race(READ_ONCE(con->flags));
+}
+
+/**
+ * console_srcu_write_flags - Write flags for a registered console
+ * @con: struct console pointer of console to write flags to
+ * @flags: new flags value to write
+ *
+ * Only use this function to write flags for registered consoles. It
+ * requires holding the console_list_lock.
+ *
+ * Context: Any context.
+ */
+static inline void console_srcu_write_flags(struct console *con, short flags)
+{
+ lockdep_assert_console_list_lock_held();
+
+ /* This matches the READ_ONCE() in console_srcu_read_flags(). */
+ WRITE_ONCE(con->flags, flags);
+}
+
+/* Variant of console_is_registered() when the console_list_lock is held. */
+static inline bool console_is_registered_locked(const struct console *con)
+{
+ lockdep_assert_console_list_lock_held();
+ return !hlist_unhashed(&con->node);
+}
+
/*
- * for_each_console() allows you to iterate on each console
+ * console_is_registered - Check if the console is registered
+ * @con: struct console pointer of console to check
+ *
+ * Context: Process context. May sleep while acquiring console list lock.
+ * Return: true if the console is in the console list, otherwise false.
+ *
+ * If false is returned for a console that was previously registered, it
+ * can be assumed that the console's unregistration is fully completed,
+ * including the exit() callback after console list removal.
+ */
+static inline bool console_is_registered(const struct console *con)
+{
+ bool ret;
+
+ console_list_lock();
+ ret = console_is_registered_locked(con);
+ console_list_unlock();
+ return ret;
+}
+
+/**
+ * for_each_console_srcu() - Iterator over registered consoles
+ * @con: struct console pointer used as loop cursor
+ *
+ * Although SRCU guarantees the console list will be consistent, the
+ * struct console fields may be updated by other CPUs while iterating.
+ *
+ * Requires console_srcu_read_lock to be held. Can be invoked from
+ * any context.
+ */
+#define for_each_console_srcu(con) \
+ hlist_for_each_entry_srcu(con, &console_list, node, \
+ console_srcu_read_lock_is_held())
+
+/**
+ * for_each_console() - Iterator over registered consoles
+ * @con: struct console pointer used as loop cursor
+ *
+ * The console list and the console->flags are immutable while iterating.
+ *
+ * Requires console_list_lock to be held.
*/
-#define for_each_console(con) \
- for (con = console_drivers; con != NULL; con = con->next)
+#define for_each_console(con) \
+ lockdep_assert_console_list_lock_held(); \
+ hlist_for_each_entry(con, &console_list, node)
extern int console_set_on_cmdline;
extern struct console *early_console;
@@ -172,9 +291,9 @@ enum con_flush_mode {
};
extern int add_preferred_console(char *name, int idx, char *options);
+extern void console_force_preferred_locked(struct console *con);
extern void register_console(struct console *);
extern int unregister_console(struct console *);
-extern struct console *console_drivers;
extern void console_lock(void);
extern int console_trylock(void);
extern void console_unlock(void);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 59ae95ddb679..71b5c24b55ce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1170,6 +1170,7 @@ extern int locks_delete_block(struct file_lock *);
extern int vfs_test_lock(struct file *, struct file_lock *);
extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
+bool vfs_inode_has_locks(struct inode *inode);
extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
extern void lease_get_mtime(struct inode *, struct timespec64 *time);
@@ -1186,6 +1187,13 @@ extern void show_fd_locks(struct seq_file *f,
struct file *filp, struct files_struct *files);
extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
fl_owner_t owner);
+
+static inline struct file_lock_context *
+locks_inode_context(const struct inode *inode)
+{
+ return smp_load_acquire(&inode->i_flctx);
+}
+
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd,
struct flock __user *user)
@@ -1284,6 +1292,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
return 0;
}
+static inline bool vfs_inode_has_locks(struct inode *inode)
+{
+ return false;
+}
+
static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
return -ENOLCK;
@@ -1326,6 +1339,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
{
return false;
}
+
+static inline struct file_lock_context *
+locks_inode_context(const struct inode *inode)
+{
+ return NULL;
+}
+
#endif /* !CONFIG_FILE_LOCKING */
static inline struct inode *file_inode(const struct file *f)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ef4aea3b356e..65a78773dcca 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p
return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
}
+static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
+{
+ gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN);
+
+ if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN))
+ return;
+
+ if (node_online(this_node))
+ return;
+
+ pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node);
+ dump_stack();
+}
+
/*
* Allocate pages, preferring the node given as nid. The node must be valid and
* online. For more general interface, see alloc_pages_node().
@@ -218,7 +232,7 @@ static inline struct page *
__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
- VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid));
+ warn_if_node_offline(nid, gfp_mask);
return __alloc_pages(gfp_mask, order, nid, NULL);
}
@@ -227,7 +241,7 @@ static inline
struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
- VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
+ warn_if_node_offline(nid, gfp);
return __folio_alloc(gfp, order, nid, NULL);
}
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index d811b3d7d2a1..96c9d56e5510 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -302,7 +302,7 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
#ifdef CONFIG_KASAN_GENERIC
-size_t kasan_metadata_size(struct kmem_cache *cache);
+size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object);
slab_flags_t kasan_never_merge(void);
void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
slab_flags_t *flags);
@@ -315,7 +315,8 @@ void kasan_record_aux_stack_noalloc(void *ptr);
#else /* CONFIG_KASAN_GENERIC */
/* Tag-based KASAN modes do not use per-object metadata. */
-static inline size_t kasan_metadata_size(struct kmem_cache *cache)
+static inline size_t kasan_metadata_size(struct kmem_cache *cache,
+ bool in_object)
{
return 0;
}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 637a60607c7d..915142abdf76 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -416,7 +416,7 @@ static __always_inline void guest_context_enter_irqoff(void)
*/
if (!context_tracking_guest_enter()) {
instrumentation_begin();
- rcu_virt_note_context_switch(smp_processor_id());
+ rcu_virt_note_context_switch();
instrumentation_end();
}
}
diff --git a/include/linux/license.h b/include/linux/license.h
index ad937f57f2cb..7cce390f120b 100644
--- a/include/linux/license.h
+++ b/include/linux/license.h
@@ -2,8 +2,6 @@
#ifndef __LICENSE_H
#define __LICENSE_H
-#include <linux/string.h>
-
static inline int license_is_gpl_compatible(const char *license)
{
return (strcmp(license, "GPL") == 0
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bbcccbc5565..974ccca609d2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1852,6 +1852,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem
__show_free_areas(flags, nodemask, MAX_NR_ZONES - 1);
}
+/*
+ * Parameter block passed down to zap_pte_range in exceptional cases.
+ */
+struct zap_details {
+ struct folio *single_folio; /* Locked folio to be unmapped */
+ bool even_cows; /* Zap COWed private pages too? */
+ zap_flags_t zap_flags; /* Extra flags for zapping */
+};
+
+/*
+ * Whether to drop the pte markers, for example, the uffd-wp information for
+ * file-backed memory. This should only be specified when we will completely
+ * drop the page in the mm, either by truncation or unmapping of the vma. By
+ * default, the flag is not set.
+ */
+#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
+/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */
+#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
+
#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
#else
@@ -1869,6 +1888,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
+void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+ unsigned long size, struct zap_details *details);
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
struct vm_area_struct *start_vma, unsigned long start,
unsigned long end);
@@ -3467,12 +3488,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
}
#endif
-/*
- * Whether to drop the pte markers, for example, the uffd-wp information for
- * file-backed memory. This should only be specified when we will completely
- * drop the page in the mm, either by truncation or unmapping of the vma. By
- * default, the flag is not set.
- */
-#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
-
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 9c50bc40f8ff..6f7993803ee7 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status)
#define MMC_SECURE_TRIM1_ARG 0x80000001
#define MMC_SECURE_TRIM2_ARG 0x80008000
#define MMC_SECURE_ARGS 0x80000000
-#define MMC_TRIM_ARGS 0x00008001
+#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003
#define mmc_driver_type_mask(n) (1 << (n))
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cdb171efc7cb..fee881cded01 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -94,6 +94,7 @@ static inline struct cred *nsset_cred(struct nsset *set)
int copy_namespaces(unsigned long flags, struct task_struct *tsk);
void exit_task_namespaces(struct task_struct *tsk);
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
+int exec_task_namespaces(void);
void free_nsproxy(struct nsproxy *ns);
int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
struct cred *, struct fs_struct *);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index f1ec5ad1351c..3dbb6fb70658 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -42,7 +42,7 @@
* larger than PERCPU_DYNAMIC_EARLY_SIZE.
*/
#define PERCPU_DYNAMIC_EARLY_SLOTS 128
-#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10)
+#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10)
/*
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a108b60a6962..5f0d7d0b9471 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
}
+#ifndef pmd_young
+static inline int pmd_young(pmd_t pmd)
+{
+ return 0;
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif
+#ifndef arch_has_hw_nonleaf_pmd_young
+/*
+ * Return whether the accessed bit in non-leaf PMD entries is supported on the
+ * local CPU.
+ */
+static inline bool arch_has_hw_nonleaf_pmd_young(void)
+{
+ return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG);
+}
+#endif
+
#ifndef arch_has_hw_pte_young
/*
* Return whether the accessed bit is supported on the local CPU.
diff --git a/include/linux/platform_data/st33zp24.h b/include/linux/platform_data/st33zp24.h
deleted file mode 100644
index 61db674f36cc..000000000000
--- a/include/linux/platform_data/st33zp24.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * STMicroelectronics TPM Linux driver for TPM 1.2 ST33ZP24
- * Copyright (C) 2009 - 2016 STMicroelectronics
- */
-#ifndef __ST33ZP24_H__
-#define __ST33ZP24_H__
-
-#define TPM_ST33_I2C "st33zp24-i2c"
-#define TPM_ST33_SPI "st33zp24-spi"
-
-struct st33zp24_platform_data {
- int io_lpcpd;
-};
-
-#endif /* __ST33ZP24_H__ */
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 9f16afec7290..9d65ff94e216 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -8,28 +8,7 @@
#ifndef __LINUX_PSTORE_RAM_H__
#define __LINUX_PSTORE_RAM_H__
-#include <linux/compiler.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
#include <linux/pstore.h>
-#include <linux/types.h>
-
-/*
- * Choose whether access to the RAM zone requires locking or not. If a zone
- * can be written to from different CPUs like with ftrace for example, then
- * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required.
- */
-#define PRZ_FLAG_NO_LOCK BIT(0)
-/*
- * If a PRZ should only have a single-boot lifetime, this marks it as
- * getting wiped after its contents get copied out after boot.
- */
-#define PRZ_FLAG_ZAP_OLD BIT(1)
-
-struct persistent_ram_buffer;
-struct rs_control;
struct persistent_ram_ecc_info {
int block_size;
@@ -39,84 +18,6 @@ struct persistent_ram_ecc_info {
uint16_t *par;
};
-/**
- * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ)
- * used as a pstore backend
- *
- * @paddr: physical address of the mapped RAM area
- * @size: size of mapping
- * @label: unique name of this PRZ
- * @type: frontend type for this PRZ
- * @flags: holds PRZ_FLAGS_* bits
- *
- * @buffer_lock:
- * locks access to @buffer "size" bytes and "start" offset
- * @buffer:
- * pointer to actual RAM area managed by this PRZ
- * @buffer_size:
- * bytes in @buffer->data (not including any trailing ECC bytes)
- *
- * @par_buffer:
- * pointer into @buffer->data containing ECC bytes for @buffer->data
- * @par_header:
- * pointer into @buffer->data containing ECC bytes for @buffer header
- * (i.e. all fields up to @data)
- * @rs_decoder:
- * RSLIB instance for doing ECC calculations
- * @corrected_bytes:
- * ECC corrected bytes accounting since boot
- * @bad_blocks:
- * ECC uncorrectable bytes accounting since boot
- * @ecc_info:
- * ECC configuration details
- *
- * @old_log:
- * saved copy of @buffer->data prior to most recent wipe
- * @old_log_size:
- * bytes contained in @old_log
- *
- */
-struct persistent_ram_zone {
- phys_addr_t paddr;
- size_t size;
- void *vaddr;
- char *label;
- enum pstore_type_id type;
- u32 flags;
-
- raw_spinlock_t buffer_lock;
- struct persistent_ram_buffer *buffer;
- size_t buffer_size;
-
- char *par_buffer;
- char *par_header;
- struct rs_control *rs_decoder;
- int corrected_bytes;
- int bad_blocks;
- struct persistent_ram_ecc_info ecc_info;
-
- char *old_log;
- size_t old_log_size;
-};
-
-struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
- u32 sig, struct persistent_ram_ecc_info *ecc_info,
- unsigned int memtype, u32 flags, char *label);
-void persistent_ram_free(struct persistent_ram_zone *prz);
-void persistent_ram_zap(struct persistent_ram_zone *prz);
-
-int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
- unsigned int count);
-int persistent_ram_write_user(struct persistent_ram_zone *prz,
- const void __user *s, unsigned int count);
-
-void persistent_ram_save_old(struct persistent_ram_zone *prz);
-size_t persistent_ram_old_size(struct persistent_ram_zone *prz);
-void *persistent_ram_old(struct persistent_ram_zone *prz);
-void persistent_ram_free_old(struct persistent_ram_zone *prz);
-ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
- char *str, size_t len);
-
/*
* Ramoops platform data
* @mem_size memory size for ramoops
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 08605ce7379d..4da98ca6273e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_RCU_LAZY
+void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func);
+#else
+static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
+{
+ call_rcu(head, func);
+}
+#endif
+
/* Internal to kernel */
void rcu_init(void);
extern int rcu_scheduler_active;
@@ -340,6 +349,11 @@ static inline int rcu_read_lock_any_held(void)
return !preemptible();
}
+static inline int debug_lockdep_rcu_enabled(void)
+{
+ return 0;
+}
+
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_PROVE_RCU
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 768196a5f39d..68f9070aa111 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -142,23 +142,17 @@ static inline int rcu_needs_cpu(void)
* Take advantage of the fact that there is only one CPU, which
* allows us to ignore virtualization-based context switches.
*/
-static inline void rcu_virt_note_context_switch(int cpu) { }
+static inline void rcu_virt_note_context_switch(void) { }
static inline void rcu_cpu_stall_reset(void) { }
static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
static inline void rcu_irq_exit_check_preempt(void) { }
-#define rcu_is_idle_cpu(cpu) \
- (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq())
static inline void exit_rcu(void) { }
static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return false;
}
static inline void rcu_preempt_deferred_qs(struct task_struct *t) { }
-#ifdef CONFIG_SRCU
void rcu_scheduler_starting(void);
-#else /* #ifndef CONFIG_SRCU */
-static inline void rcu_scheduler_starting(void) { }
-#endif /* #else #ifndef CONFIG_SRCU */
static inline void rcu_end_inkernel_boot(void) { }
static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 5efb51486e8a..4003bf6cfa1c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void);
* wrapper around rcu_note_context_switch(), which allows TINY_RCU
* to save a few bytes. The caller must have disabled interrupts.
*/
-static inline void rcu_virt_note_context_switch(int cpu)
+static inline void rcu_virt_note_context_switch(void)
{
rcu_note_context_switch(false);
}
@@ -87,8 +87,6 @@ bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
void cond_synchronize_rcu(unsigned long oldstate);
void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
-bool rcu_is_idle_cpu(int cpu);
-
#ifdef CONFIG_PROVE_RCU
void rcu_irq_exit_check_preempt(void);
#else
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index d31d76be4982..175079552f68 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -27,6 +27,7 @@ struct seccomp_filter;
*
* @mode: indicates one of the valid values above for controlled
* system calls available to a process.
+ * @filter_count: number of seccomp filters
* @filter: must always point to a valid seccomp-filter or NULL as it is
* accessed without locking during system call entry.
*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index d657f2a42a7b..91871464b99d 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -743,9 +743,15 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED;
static inline int setup_earlycon(char *buf) { return 0; }
#endif
-static inline bool uart_console_enabled(struct uart_port *port)
+/* Variant of uart_console_registered() when the console_list_lock is held. */
+static inline bool uart_console_registered_locked(struct uart_port *port)
{
- return uart_console(port) && (port->cons->flags & CON_ENABLED);
+ return uart_console(port) && console_is_registered_locked(port->cons);
+}
+
+static inline bool uart_console_registered(struct uart_port *port)
+{
+ return uart_console(port) && console_is_registered(port->cons);
}
struct uart_port *uart_get_console(struct uart_port *ports, int nr,
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 45efc6c553b8..45af70315a94 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -76,6 +76,17 @@
* rcu_read_lock before reading the address, then rcu_read_unlock after
* taking the spinlock within the structure expected at that address.
*
+ * Note that it is not possible to acquire a lock within a structure
+ * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference
+ * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages
+ * are not zeroed before being given to the slab, which means that any
+ * locks must be initialized after each and every kmem_struct_alloc().
+ * Alternatively, make the ctor passed to kmem_cache_create() initialize
+ * the locks at page-allocation time, as is done in __i915_request_ctor(),
+ * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers
+ * to safely acquire those ctor-initialized locks under rcu_read_lock()
+ * protection.
+ *
* Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU.
*/
/* Defer freeing slabs to RCU */
@@ -129,7 +140,11 @@
/* The following flags affect the page allocator grouping pages by mobility */
/* Objects are reclaimable */
+#ifndef CONFIG_SLUB_TINY
#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U)
+#else
+#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0)
+#endif
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
/*
@@ -336,13 +351,18 @@ enum kmalloc_cache_type {
#endif
#ifndef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP = KMALLOC_NORMAL,
-#else
- KMALLOC_CGROUP,
#endif
+#ifdef CONFIG_SLUB_TINY
+ KMALLOC_RECLAIM = KMALLOC_NORMAL,
+#else
KMALLOC_RECLAIM,
+#endif
#ifdef CONFIG_ZONE_DMA
KMALLOC_DMA,
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ KMALLOC_CGROUP,
+#endif
NR_KMALLOC_TYPES
};
@@ -441,7 +461,18 @@ static_assert(PAGE_SHIFT <= 20);
#endif /* !CONFIG_SLOB */
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
-void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
+
+/**
+ * kmem_cache_alloc - Allocate an object
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ *
+ * Allocate an object from this cache.
+ * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags.
+ *
+ * Return: pointer to the new object or %NULL in case of error
+ */
+void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc;
void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags) __assume_slab_alignment __malloc;
void kmem_cache_free(struct kmem_cache *s, void *objp);
@@ -483,9 +514,9 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
__alloc_size(1);
/**
- * kmalloc - allocate memory
+ * kmalloc - allocate kernel memory
* @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
+ * @flags: describe the allocation context
*
* kmalloc is the normal method of allocating memory
* for objects smaller than page size in the kernel.
@@ -512,12 +543,12 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* %GFP_ATOMIC
* Allocation will not sleep. May use emergency pools.
*
- * %GFP_HIGHUSER
- * Allocate memory from high memory on behalf of user.
- *
* Also it is possible to set different flags by OR'ing
* in one or more of the following additional @flags:
*
+ * %__GFP_ZERO
+ * Zero the allocated memory before returning. Also see kzalloc().
+ *
* %__GFP_HIGH
* This allocation has high priority and may use emergency pools.
*
@@ -536,42 +567,42 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* Try really hard to succeed the allocation but fail
* eventually.
*/
+#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
{
- if (__builtin_constant_p(size)) {
-#ifndef CONFIG_SLOB
+ if (__builtin_constant_p(size) && size) {
unsigned int index;
-#endif
+
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags);
-#ifndef CONFIG_SLOB
- index = kmalloc_index(size);
-
- if (!index)
- return ZERO_SIZE_PTR;
+ index = kmalloc_index(size);
return kmalloc_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, size);
-#endif
}
return __kmalloc(size, flags);
}
+#else
+static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
+{
+ if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE)
+ return kmalloc_large(size, flags);
+
+ return __kmalloc(size, flags);
+}
+#endif
#ifndef CONFIG_SLOB
static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
{
- if (__builtin_constant_p(size)) {
+ if (__builtin_constant_p(size) && size) {
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large_node(size, flags, node);
index = kmalloc_index(size);
-
- if (!index)
- return ZERO_SIZE_PTR;
-
return kmalloc_node_trace(
kmalloc_caches[kmalloc_type(flags)][index],
flags, node, size);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index f0ffad6a3365..5834bad8ad78 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -80,8 +80,10 @@ struct kmem_cache {
unsigned int *random_seq;
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
+#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f9c68a9dac04..aa0ee1678d29 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -41,6 +41,7 @@ enum stat_item {
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
NR_SLUB_STAT_ITEMS };
+#ifndef CONFIG_SLUB_TINY
/*
* When changing the layout, make sure freelist and tid are still compatible
* with this_cpu_cmpxchg_double() alignment requirements.
@@ -57,6 +58,7 @@ struct kmem_cache_cpu {
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
};
+#endif /* CONFIG_SLUB_TINY */
#ifdef CONFIG_SLUB_CPU_PARTIAL
#define slub_percpu_partial(c) ((c)->partial)
@@ -88,7 +90,9 @@ struct kmem_cache_order_objects {
* Slab cache management.
*/
struct kmem_cache {
+#ifndef CONFIG_SLUB_TINY
struct kmem_cache_cpu __percpu *cpu_slab;
+#endif
/* Used for retrieving partial slabs, etc. */
slab_flags_t flags;
unsigned long min_partial;
@@ -136,13 +140,15 @@ struct kmem_cache {
struct kasan_cache kasan_info;
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
unsigned int useroffset; /* Usercopy region offset */
unsigned int usersize; /* Usercopy region size */
+#endif
struct kmem_cache_node *node[MAX_NUMNODES];
};
-#ifdef CONFIG_SYSFS
+#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
#define SLAB_SUPPORTS_SYSFS
void sysfs_slab_unlink(struct kmem_cache *);
void sysfs_slab_release(struct kmem_cache *);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 01226e4d960a..9b9d0bbf1d3c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -47,11 +47,8 @@ int init_srcu_struct(struct srcu_struct *ssp);
#include <linux/srcutiny.h>
#elif defined(CONFIG_TREE_SRCU)
#include <linux/srcutree.h>
-#elif defined(CONFIG_SRCU)
-#error "Unknown SRCU implementation specified to kernel configuration"
#else
-/* Dummy definition for things like notifiers. Actual use gets link error. */
-struct srcu_struct { };
+#error "Unknown SRCU implementation specified to kernel configuration"
#endif
void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
@@ -64,11 +61,21 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
-#ifdef CONFIG_SRCU
+#ifdef CONFIG_NEED_SRCU_NMI_SAFE
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
+#else
+static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+{
+ return __srcu_read_lock(ssp);
+}
+static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+{
+ __srcu_read_unlock(ssp, idx);
+}
+#endif /* CONFIG_NEED_SRCU_NMI_SAFE */
+
void srcu_init(void);
-#else /* #ifdef CONFIG_SRCU */
-static inline void srcu_init(void) { }
-#endif /* #else #ifdef CONFIG_SRCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -104,6 +111,18 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+#define SRCU_NMI_UNKNOWN 0x0
+#define SRCU_NMI_UNSAFE 0x1
+#define SRCU_NMI_SAFE 0x2
+
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
+void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe);
+#else
+static inline void srcu_check_nmi_safety(struct srcu_struct *ssp,
+ bool nmi_safe) { }
+#endif
+
+
/**
* srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
* @p: the pointer to fetch and protect for later dereferencing
@@ -161,17 +180,36 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
+ srcu_check_nmi_safety(ssp, false);
retval = __srcu_read_lock(ssp);
rcu_lock_acquire(&(ssp)->dep_map);
return retval;
}
+/**
+ * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but in an NMI-safe manner.
+ * See srcu_read_lock() for more information.
+ */
+static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
+{
+ int retval;
+
+ srcu_check_nmi_safety(ssp, true);
+ retval = __srcu_read_lock_nmisafe(ssp);
+ rcu_lock_acquire(&(ssp)->dep_map);
+ return retval;
+}
+
/* Used by tracing, cannot be traced and cannot invoke lockdep. */
static inline notrace int
srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
+ srcu_check_nmi_safety(ssp, false);
retval = __srcu_read_lock(ssp);
return retval;
}
@@ -187,14 +225,32 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
__releases(ssp)
{
WARN_ON_ONCE(idx & ~0x1);
+ srcu_check_nmi_safety(ssp, false);
rcu_lock_release(&(ssp)->dep_map);
__srcu_read_unlock(ssp, idx);
}
+/**
+ * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit an SRCU read-side critical section, but in an NMI-safe manner.
+ */
+static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+ __releases(ssp)
+{
+ WARN_ON_ONCE(idx & ~0x1);
+ srcu_check_nmi_safety(ssp, true);
+ rcu_lock_release(&(ssp)->dep_map);
+ __srcu_read_unlock_nmisafe(ssp, idx);
+}
+
/* Used by tracing, cannot be traced and cannot call lockdep. */
static inline notrace void
srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
{
+ srcu_check_nmi_safety(ssp, false);
__srcu_read_unlock(ssp, idx);
}
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index e3014319d1ad..c689a81752c9 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -23,8 +23,9 @@ struct srcu_struct;
*/
struct srcu_data {
/* Read-side state. */
- unsigned long srcu_lock_count[2]; /* Locks per CPU. */
- unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */
+ atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */
+ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */
+ int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */
/* Update-side state. */
spinlock_t __private lock ____cacheline_internodealigned_in_smp;
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 86b95ccb81bb..b07b277d6a16 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -33,11 +33,13 @@
* can use the extra bits to store other information besides PFN.
*/
#ifdef MAX_PHYSMEM_BITS
-#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
#else /* MAX_PHYSMEM_BITS */
-#define SWP_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT)
+#define SWP_PFN_BITS min_t(int, \
+ sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \
+ SWP_TYPE_SHIFT)
#endif /* MAX_PHYSMEM_BITS */
-#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1)
+#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1)
/**
* Migration swap entry specific bitfield definitions. Layout:
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index e004ba04a9ae..684f1cd28730 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -228,6 +228,17 @@ enum {
*/
HCI_QUIRK_VALID_LE_STATES,
+ /* When this quirk is set, then erroneous data reporting
+ * is ignored. This is mainly due to the fact that the HCI
+ * Read Default Erroneous Data Reporting command is advertised,
+ * but not supported; these controllers often reply with unknown
+ * command and tend to lock up randomly. Needing a hard reset.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BROKEN_ERR_DATA_REPORTING,
+
/*
* When this quirk is set, then the hci_suspend_notifier is not
* registered. This is intended for devices which drop completely
@@ -1424,7 +1435,6 @@ struct hci_std_codecs_v2 {
} __packed;
struct hci_vnd_codec_v2 {
- __u8 id;
__le16 cid;
__le16 vid;
__u8 transport;
diff --git a/include/net/ping.h b/include/net/ping.h
index e4ff3911cbf5..9233ad3de0ad 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -16,9 +16,6 @@
#define PING_HTABLE_SIZE 64
#define PING_HTABLE_MASK (PING_HTABLE_SIZE-1)
-#define ping_portaddr_for_each_entry(__sk, node, list) \
- hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
-
/*
* gid_t is either uint or ushort. We want to pass it to
* proc_dointvec_minmax(), so it must not be larger than MAX_INT
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index 01a70b27e026..65058faea4db 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -26,6 +26,8 @@ struct sctp_sched_ops {
int (*init)(struct sctp_stream *stream);
/* Init a stream */
int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp);
+ /* free a stream */
+ void (*free_sid)(struct sctp_stream *stream, __u16 sid);
/* Frees the entire thing */
void (*free)(struct sctp_stream *stream);
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
index c078c48a8e6d..a6190aa1b406 100644
--- a/include/trace/events/fscache.h
+++ b/include/trace/events/fscache.h
@@ -66,6 +66,7 @@ enum fscache_cookie_trace {
fscache_cookie_put_work,
fscache_cookie_see_active,
fscache_cookie_see_lru_discard,
+ fscache_cookie_see_lru_discard_clear,
fscache_cookie_see_lru_do_one,
fscache_cookie_see_relinquish,
fscache_cookie_see_withdraw,
@@ -149,6 +150,7 @@ enum fscache_access_trace {
EM(fscache_cookie_put_work, "PQ work ") \
EM(fscache_cookie_see_active, "- activ") \
EM(fscache_cookie_see_lru_discard, "- x-lru") \
+ EM(fscache_cookie_see_lru_discard_clear,"- lrudc") \
EM(fscache_cookie_see_lru_do_one, "- lrudo") \
EM(fscache_cookie_see_relinquish, "- x-rlq") \
EM(fscache_cookie_see_withdraw, "- x-wth") \
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index c7b056af9ef0..4c6a8fa5e7ed 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -91,7 +91,7 @@ typedef __s64 Elf64_Sxword;
#define DT_INIT 12
#define DT_FINI 13
#define DT_SONAME 14
-#define DT_RPATH 15
+#define DT_RPATH 15
#define DT_SYMBOLIC 16
#define DT_REL 17
#define DT_RELSZ 18
@@ -140,9 +140,9 @@ typedef __s64 Elf64_Sxword;
#define ELF64_ST_BIND(x) ELF_ST_BIND(x)
#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x)
-typedef struct dynamic{
+typedef struct dynamic {
Elf32_Sword d_tag;
- union{
+ union {
Elf32_Sword d_val;
Elf32_Addr d_ptr;
} d_un;
@@ -173,7 +173,7 @@ typedef struct elf64_rel {
Elf64_Xword r_info; /* index and type of relocation */
} Elf64_Rel;
-typedef struct elf32_rela{
+typedef struct elf32_rela {
Elf32_Addr r_offset;
Elf32_Word r_info;
Elf32_Sword r_addend;
@@ -185,7 +185,7 @@ typedef struct elf64_rela {
Elf64_Sxword r_addend; /* Constant addend used to compute value */
} Elf64_Rela;
-typedef struct elf32_sym{
+typedef struct elf32_sym {
Elf32_Word st_name;
Elf32_Addr st_value;
Elf32_Word st_size;
@@ -206,7 +206,7 @@ typedef struct elf64_sym {
#define EI_NIDENT 16
-typedef struct elf32_hdr{
+typedef struct elf32_hdr {
unsigned char e_ident[EI_NIDENT];
Elf32_Half e_type;
Elf32_Half e_machine;
@@ -246,7 +246,7 @@ typedef struct elf64_hdr {
#define PF_W 0x2
#define PF_X 0x1
-typedef struct elf32_phdr{
+typedef struct elf32_phdr {
Elf32_Word p_type;
Elf32_Off p_offset;
Elf32_Addr p_vaddr;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 8840cf3e20f2..61cd7ffd0f6a 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2707,8 +2707,10 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb)
/*
* When @in_idle, we're in cancellation and it's racy to remove the
* node. It'll be removed by the end of cancellation, just ignore it.
+ * tctx can be NULL if the queueing of this task_work raced with
+ * work cancelation off the exec path.
*/
- if (!atomic_read(&tctx->in_idle))
+ if (tctx && !atomic_read(&tctx->in_idle))
io_uring_del_tctx_node((unsigned long)work->ctx);
complete(&work->completion);
}
diff --git a/ipc/sem.c b/ipc/sem.c
index c8496f98b139..00f88aa01ac5 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2179,14 +2179,15 @@ long __do_semtimedop(int semid, struct sembuf *sops,
* scenarios where we were awakened externally, during the
* window between wake_q_add() and wake_up_q().
*/
+ rcu_read_lock();
error = READ_ONCE(queue.status);
if (error != -EINTR) {
/* see SEM_BARRIER_2 for purpose/pairing */
smp_acquire__after_ctrl_dep();
+ rcu_read_unlock();
goto out;
}
- rcu_read_lock();
locknum = sem_lock(sma, sops, nsops);
if (!ipc_valid_object(&sma->sem_perm))
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 802fc15b0d73..f27fa5ba7d72 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -74,7 +74,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
gfp_flags | __GFP_NOWARN);
if (selem) {
if (value)
- memcpy(SDATA(selem)->data, value, smap->map.value_size);
+ copy_map_value(&smap->map, SDATA(selem)->data, value);
return selem;
}
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index fd4020835ec6..367b0a42ada9 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -167,7 +167,6 @@ struct cgroup_mgctx {
extern spinlock_t css_set_lock;
extern struct cgroup_subsys *cgroup_subsys[];
extern struct list_head cgroup_roots;
-extern struct file_system_type cgroup_fs_type;
/* iterate across the hierarchies */
#define for_each_root(root) \
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index 8a44b93da0f3..c2f9c912df1c 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -7,5 +7,6 @@ CONFIG_KERNEL_XZ=y
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
# CONFIG_SLAB is not set
-# CONFIG_SLUB is not set
-CONFIG_SLOB=y
+# CONFIG_SLOB_DEPRECATED is not set
+CONFIG_SLUB=y
+CONFIG_SLUB_TINY=y
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 67d3c48a1522..5c7e9ba7cd6b 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -545,6 +545,7 @@ static void kdb_msg_write(const char *msg, int msg_len)
{
struct console *c;
const char *cp;
+ int cookie;
int len;
if (msg_len == 0)
@@ -558,8 +559,20 @@ static void kdb_msg_write(const char *msg, int msg_len)
cp++;
}
- for_each_console(c) {
- if (!(c->flags & CON_ENABLED))
+ /*
+ * The console_srcu_read_lock() only provides safe console list
+ * traversal. The use of the ->write() callback relies on all other
+ * CPUs being stopped at the moment and console drivers being able to
+ * handle reentrance when @oops_in_progress is set.
+ *
+ * There is no guarantee that every console driver can handle
+ * reentrance in this way; the developer deploying the debugger
+ * is responsible for ensuring that the console drivers they
+ * have selected handle reentrance appropriately.
+ */
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
+ if (!(console_srcu_read_flags(c) & CON_ENABLED))
continue;
if (c == dbg_io_ops->cons)
continue;
@@ -577,6 +590,7 @@ static void kdb_msg_write(const char *msg, int msg_len)
--oops_in_progress;
touch_nmi_watchdog();
}
+ console_srcu_read_unlock(cookie);
}
int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9d15d2d96119..7f04f995c975 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2291,6 +2291,7 @@ event_sched_out(struct perf_event *event,
!event->pending_work) {
event->pending_work = 1;
dec = false;
+ WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
task_work_add(current, &event->pending_task, TWA_RESUME);
}
if (dec)
@@ -2336,6 +2337,7 @@ group_sched_out(struct perf_event *group_event,
#define DETACH_GROUP 0x01UL
#define DETACH_CHILD 0x02UL
+#define DETACH_DEAD 0x04UL
/*
* Cross CPU call to remove a performance event
@@ -2356,12 +2358,20 @@ __perf_remove_from_context(struct perf_event *event,
update_cgrp_time_from_cpuctx(cpuctx, false);
}
+ /*
+ * Ensure event_sched_out() switches to OFF, at the very least
+ * this avoids raising perf_pending_task() at this time.
+ */
+ if (flags & DETACH_DEAD)
+ event->pending_disable = 1;
event_sched_out(event, cpuctx, ctx);
if (flags & DETACH_GROUP)
perf_group_detach(event);
if (flags & DETACH_CHILD)
perf_child_detach(event);
list_del_event(event, ctx);
+ if (flags & DETACH_DEAD)
+ event->state = PERF_EVENT_STATE_DEAD;
if (!ctx->nr_events && ctx->is_active) {
if (ctx == &cpuctx->ctx)
@@ -5121,9 +5131,7 @@ int perf_event_release_kernel(struct perf_event *event)
ctx = perf_event_ctx_lock(event);
WARN_ON_ONCE(ctx->parent_ctx);
- perf_remove_from_context(event, DETACH_GROUP);
- raw_spin_lock_irq(&ctx->lock);
/*
* Mark this event as STATE_DEAD, there is no external reference to it
* anymore.
@@ -5135,8 +5143,7 @@ int perf_event_release_kernel(struct perf_event *event)
* Thus this guarantees that we will in fact observe and kill _ALL_
* child events.
*/
- event->state = PERF_EVENT_STATE_DEAD;
- raw_spin_unlock_irq(&ctx->lock);
+ perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);
perf_event_ctx_unlock(event, ctx);
@@ -6577,6 +6584,8 @@ static void perf_pending_task(struct callback_head *head)
if (rctx >= 0)
perf_swevent_put_recursion_context(rctx);
preempt_enable_notrace();
+
+ put_event(event);
}
#ifdef CONFIG_GUEST_PERF_EVENTS
@@ -9030,7 +9039,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
PERF_RECORD_KSYMBOL_TYPE_BPF,
(u64)(unsigned long)subprog->bpf_func,
subprog->jited_len, unregister,
- prog->aux->ksym.name);
+ subprog->aux->ksym.name);
}
}
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d..cfb09ca1b1bc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -535,6 +535,9 @@ void put_task_stack(struct task_struct *tsk)
void free_task(struct task_struct *tsk)
{
+#ifdef CONFIG_SECCOMP
+ WARN_ON_ONCE(tsk->seccomp.filter);
+#endif
release_user_cpus_ptr(tsk);
scs_release(tsk);
@@ -2043,15 +2046,6 @@ static __latent_entropy struct task_struct *copy_process(
return ERR_PTR(-EINVAL);
}
- /*
- * If the new process will be in a different time namespace
- * do not allow it to share VM or a thread group with the forking task.
- */
- if (clone_flags & (CLONE_THREAD | CLONE_VM)) {
- if (nsp->time_ns != nsp->time_ns_for_children)
- return ERR_PTR(-EINVAL);
- }
-
if (clone_flags & CLONE_PIDFD) {
/*
* - CLONE_DETACHED is blocked so that we can potentially
@@ -2406,12 +2400,6 @@ static __latent_entropy struct task_struct *copy_process(
spin_lock(&current->sighand->siglock);
- /*
- * Copy seccomp details explicitly here, in case they were changed
- * before holding sighand lock.
- */
- copy_seccomp(p);
-
rv_task_fork(p);
rseq_fork(p, clone_flags);
@@ -2428,6 +2416,14 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_cancel_cgroup;
}
+ /* No more failure paths after this point. */
+
+ /*
+ * Copy seccomp details explicitly here, in case they were changed
+ * before holding sighand lock.
+ */
+ copy_seccomp(p);
+
init_task_pid_links(p);
if (likely(p->pid)) {
ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index fe12dfe254ec..54d077e1a2dc 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -14,10 +14,12 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/minmax.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/sched.h>
+#include <linux/string.h>
#include <linux/uaccess.h>
#include "encoding.h"
@@ -1308,3 +1310,51 @@ noinline void __tsan_atomic_signal_fence(int memorder)
}
}
EXPORT_SYMBOL(__tsan_atomic_signal_fence);
+
+#ifdef __HAVE_ARCH_MEMSET
+void *__tsan_memset(void *s, int c, size_t count);
+noinline void *__tsan_memset(void *s, int c, size_t count)
+{
+ /*
+ * Instead of not setting up watchpoints where accessed size is greater
+ * than MAX_ENCODABLE_SIZE, truncate checked size to MAX_ENCODABLE_SIZE.
+ */
+ size_t check_len = min_t(size_t, count, MAX_ENCODABLE_SIZE);
+
+ check_access(s, check_len, KCSAN_ACCESS_WRITE, _RET_IP_);
+ return memset(s, c, count);
+}
+#else
+void *__tsan_memset(void *s, int c, size_t count) __alias(memset);
+#endif
+EXPORT_SYMBOL(__tsan_memset);
+
+#ifdef __HAVE_ARCH_MEMMOVE
+void *__tsan_memmove(void *dst, const void *src, size_t len);
+noinline void *__tsan_memmove(void *dst, const void *src, size_t len)
+{
+ size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE);
+
+ check_access(dst, check_len, KCSAN_ACCESS_WRITE, _RET_IP_);
+ check_access(src, check_len, 0, _RET_IP_);
+ return memmove(dst, src, len);
+}
+#else
+void *__tsan_memmove(void *dst, const void *src, size_t len) __alias(memmove);
+#endif
+EXPORT_SYMBOL(__tsan_memmove);
+
+#ifdef __HAVE_ARCH_MEMCPY
+void *__tsan_memcpy(void *dst, const void *src, size_t len);
+noinline void *__tsan_memcpy(void *dst, const void *src, size_t len)
+{
+ size_t check_len = min_t(size_t, len, MAX_ENCODABLE_SIZE);
+
+ check_access(dst, check_len, KCSAN_ACCESS_WRITE, _RET_IP_);
+ check_access(src, check_len, 0, _RET_IP_);
+ return memcpy(dst, src, len);
+}
+#else
+void *__tsan_memcpy(void *dst, const void *src, size_t len) __alias(memcpy);
+#endif
+EXPORT_SYMBOL(__tsan_memcpy);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index eec72ca962e2..a487ff24129b 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -157,7 +157,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
- if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
+ if ((flags & CLONE_VM) ||
+ likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
get_nsproxy(old_ns);
return 0;
}
@@ -179,7 +180,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
if (IS_ERR(new_ns))
return PTR_ERR(new_ns);
- timens_on_fork(new_ns, tsk);
+ if ((flags & CLONE_VM) == 0)
+ timens_on_fork(new_ns, tsk);
tsk->nsproxy = new_ns;
return 0;
@@ -254,6 +256,23 @@ void exit_task_namespaces(struct task_struct *p)
switch_task_namespaces(p, NULL);
}
+int exec_task_namespaces(void)
+{
+ struct task_struct *tsk = current;
+ struct nsproxy *new;
+
+ if (tsk->nsproxy->time_ns_for_children == tsk->nsproxy->time_ns)
+ return 0;
+
+ new = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+
+ timens_on_fork(new, tsk);
+ switch_task_namespaces(tsk, new);
+ return 0;
+}
+
static int check_setns_flags(unsigned long flags)
{
if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index e4f1e7478b52..7decf1e9c486 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -79,13 +79,20 @@ int oops_in_progress;
EXPORT_SYMBOL(oops_in_progress);
/*
- * console_sem protects the console_drivers list, and also
- * provides serialisation for access to the entire console
- * driver system.
+ * console_mutex protects console_list updates and console->flags updates.
+ * The flags are synchronized only for consoles that are registered, i.e.
+ * accessible via the console list.
+ */
+static DEFINE_MUTEX(console_mutex);
+
+/*
+ * console_sem protects updates to console->seq and console_suspended,
+ * and also provides serialization for console printing.
*/
static DEFINE_SEMAPHORE(console_sem);
-struct console *console_drivers;
-EXPORT_SYMBOL_GPL(console_drivers);
+HLIST_HEAD(console_list);
+EXPORT_SYMBOL_GPL(console_list);
+DEFINE_STATIC_SRCU(console_srcu);
/*
* System may need to suppress printk message under certain
@@ -103,6 +110,19 @@ static int __read_mostly suppress_panic_printk;
static struct lockdep_map console_lock_dep_map = {
.name = "console_lock"
};
+
+void lockdep_assert_console_list_lock_held(void)
+{
+ lockdep_assert_held(&console_mutex);
+}
+EXPORT_SYMBOL(lockdep_assert_console_list_lock_held);
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+bool console_srcu_read_lock_is_held(void)
+{
+ return srcu_read_lock_held(&console_srcu);
+}
#endif
enum devkmsg_log_bits {
@@ -220,6 +240,69 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
}
#endif /* CONFIG_PRINTK && CONFIG_SYSCTL */
+/**
+ * console_list_lock - Lock the console list
+ *
+ * For console list or console->flags updates
+ */
+void console_list_lock(void)
+{
+ /*
+ * In unregister_console() and console_force_preferred_locked(),
+ * synchronize_srcu() is called with the console_list_lock held.
+ * Therefore it is not allowed that the console_list_lock is taken
+ * with the srcu_lock held.
+ *
+ * Detecting if this context is really in the read-side critical
+ * section is only possible if the appropriate debug options are
+ * enabled.
+ */
+ WARN_ON_ONCE(debug_lockdep_rcu_enabled() &&
+ srcu_read_lock_held(&console_srcu));
+
+ mutex_lock(&console_mutex);
+}
+EXPORT_SYMBOL(console_list_lock);
+
+/**
+ * console_list_unlock - Unlock the console list
+ *
+ * Counterpart to console_list_lock()
+ */
+void console_list_unlock(void)
+{
+ mutex_unlock(&console_mutex);
+}
+EXPORT_SYMBOL(console_list_unlock);
+
+/**
+ * console_srcu_read_lock - Register a new reader for the
+ * SRCU-protected console list
+ *
+ * Use for_each_console_srcu() to iterate the console list
+ *
+ * Context: Any context.
+ * Return: A cookie to pass to console_srcu_read_unlock().
+ */
+int console_srcu_read_lock(void)
+{
+ return srcu_read_lock_nmisafe(&console_srcu);
+}
+EXPORT_SYMBOL(console_srcu_read_lock);
+
+/**
+ * console_srcu_read_unlock - Unregister an old reader from
+ * the SRCU-protected console list
+ * @cookie: cookie returned from console_srcu_read_lock()
+ *
+ * Counterpart to console_srcu_read_lock()
+ */
+void console_srcu_read_unlock(int cookie)
+{
+ srcu_read_unlock_nmisafe(&console_srcu, cookie);
+}
+EXPORT_SYMBOL(console_srcu_read_unlock);
+
/*
* Helper macros to handle lockdep when locking/unlocking console_sem. We use
* macros instead of functions so that _RET_IP_ contains useful information.
@@ -1814,13 +1897,13 @@ static void console_lock_spinning_enable(void)
* safe to start busy waiting for the lock. Second, it checks if
* there is a busy waiter and passes the lock rights to her.
*
- * Important: Callers lose the lock if there was a busy waiter.
- * They must not touch items synchronized by console_lock
- * in this case.
+ * Important: Callers lose both the console_lock and the SRCU read lock if
+ * there was a busy waiter. They must not touch items synchronized by
+ * console_lock or SRCU read lock in this case.
*
* Return: 1 if the lock rights were passed, 0 otherwise.
*/
-static int console_lock_spinning_disable_and_check(void)
+static int console_lock_spinning_disable_and_check(int cookie)
{
int waiter;
@@ -1840,6 +1923,12 @@ static int console_lock_spinning_disable_and_check(void)
spin_release(&console_owner_dep_map, _THIS_IP_);
/*
+ * Preserve lockdep lock ordering. Release the SRCU read lock before
+ * releasing the console_lock.
+ */
+ console_srcu_read_unlock(cookie);
+
+ /*
* Hand off console_lock to waiter. The waiter will perform
* the up(). After this, the waiter is the console_lock owner.
*/
@@ -2322,7 +2411,7 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
char *text, size_t text_len,
struct dev_printk_info *dev_info) { return 0; }
static void console_lock_spinning_enable(void) { }
-static int console_lock_spinning_disable_and_check(void) { return 0; }
+static int console_lock_spinning_disable_and_check(int cookie) { return 0; }
static void call_console_driver(struct console *con, const char *text, size_t len,
char *dropped_text)
{
@@ -2391,7 +2480,7 @@ static int __add_preferred_console(char *name, int idx, char *options,
return -E2BIG;
if (!brl_options)
preferred_console = i;
- strlcpy(c->name, name, sizeof(c->name));
+ strscpy(c->name, name, sizeof(c->name));
c->options = options;
set_user_specified(c, user_specified);
braille_set_options(c, brl_options);
@@ -2553,10 +2642,10 @@ static int console_cpu_notify(unsigned int cpu)
}
/**
- * console_lock - lock the console system for exclusive use.
+ * console_lock - block the console subsystem from printing
*
- * Acquires a lock which guarantees that the caller has
- * exclusive access to the console system and the console_drivers list.
+ * Acquires a lock which guarantees that no consoles will
+ * be in or enter their write() callback.
*
* Can sleep, returns nothing.
*/
@@ -2573,10 +2662,10 @@ void console_lock(void)
EXPORT_SYMBOL(console_lock);
/**
- * console_trylock - try to lock the console system for exclusive use.
+ * console_trylock - try to block the console subsystem from printing
*
- * Try to acquire a lock which guarantees that the caller has exclusive
- * access to the console system and the console_drivers list.
+ * Try to acquire a lock which guarantees that no consoles will
+ * be in or enter their write() callback.
*
* returns 1 on success, and 0 on failure to acquire the lock.
*/
@@ -2623,11 +2712,13 @@ static bool abandon_console_lock_in_panic(void)
* Check if the given console is currently capable and allowed to print
* records.
*
- * Requires the console_lock.
+ * Requires the console_srcu_read_lock.
*/
static inline bool console_is_usable(struct console *con)
{
- if (!(con->flags & CON_ENABLED))
+ short flags = console_srcu_read_flags(con);
+
+ if (!(flags & CON_ENABLED))
return false;
if (!con->write)
@@ -2638,8 +2729,7 @@ static inline bool console_is_usable(struct console *con)
* allocated. So unless they're explicitly marked as being able to
* cope (CON_ANYTIME) don't call them until this CPU is officially up.
*/
- if (!cpu_online(raw_smp_processor_id()) &&
- !(con->flags & CON_ANYTIME))
+ if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
return false;
return true;
@@ -2664,16 +2754,18 @@ static void __console_unlock(void)
* DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL.
*
* @handover will be set to true if a printk waiter has taken over the
- * console_lock, in which case the caller is no longer holding the
- * console_lock. Otherwise it is set to false.
+ * console_lock, in which case the caller is no longer holding both the
+ * console_lock and the SRCU read lock. Otherwise it is set to false.
+ *
+ * @cookie is the cookie from the SRCU read lock.
*
* Returns false if the given console has no next record to print, otherwise
* true.
*
- * Requires the console_lock.
+ * Requires the console_lock and the SRCU read lock.
*/
static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
- char *dropped_text, bool *handover)
+ char *dropped_text, bool *handover, int cookie)
{
static int panic_console_dropped;
struct printk_info info;
@@ -2733,7 +2825,7 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_
con->seq++;
- *handover = console_lock_spinning_disable_and_check();
+ *handover = console_lock_spinning_disable_and_check(cookie);
printk_safe_exit_irqrestore(flags);
skip:
return true;
@@ -2770,6 +2862,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
bool any_usable = false;
struct console *con;
bool any_progress;
+ int cookie;
*next_seq = 0;
*handover = false;
@@ -2777,23 +2870,29 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
do {
any_progress = false;
- for_each_console(con) {
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
bool progress;
if (!console_is_usable(con))
continue;
any_usable = true;
- if (con->flags & CON_EXTENDED) {
+ if (console_srcu_read_flags(con) & CON_EXTENDED) {
/* Extended consoles do not print "dropped messages". */
progress = console_emit_next_record(con, &text[0],
&ext_text[0], NULL,
- handover);
+ handover, cookie);
} else {
progress = console_emit_next_record(con, &text[0],
NULL, &dropped_text[0],
- handover);
+ handover, cookie);
}
+
+ /*
+ * If a handover has occurred, the SRCU read lock
+ * is already released.
+ */
if (*handover)
return false;
@@ -2807,21 +2906,26 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
/* Allow panic_cpu to take over the consoles safely. */
if (abandon_console_lock_in_panic())
- return false;
+ goto abandon;
if (do_cond_resched)
cond_resched();
}
+ console_srcu_read_unlock(cookie);
} while (any_progress);
return any_usable;
+
+abandon:
+ console_srcu_read_unlock(cookie);
+ return false;
}
/**
- * console_unlock - unlock the console system
+ * console_unlock - unblock the console subsystem from printing
*
- * Releases the console_lock which the caller holds on the console system
- * and the console driver list.
+ * Releases the console_lock which the caller holds to block printing of
+ * the console subsystem.
*
* While the console_lock was held, console output may have been buffered
* by printk(). If this is the case, console_unlock(); emits
@@ -2899,10 +3003,14 @@ EXPORT_SYMBOL(console_conditional_schedule);
void console_unblank(void)
{
struct console *c;
+ int cookie;
/*
- * console_unblank can no longer be called in interrupt context unless
- * oops_in_progress is set to 1..
+ * Stop console printing because the unblank() callback may
+ * assume the console is not within its write() callback.
+ *
+ * If @oops_in_progress is set, this may be an atomic context.
+ * In that case, attempt a trylock as best-effort.
*/
if (oops_in_progress) {
if (down_trylock_console_sem() != 0)
@@ -2912,9 +3020,14 @@ void console_unblank(void)
console_locked = 1;
console_may_schedule = 0;
- for_each_console(c)
- if ((c->flags & CON_ENABLED) && c->unblank)
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
+ if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank)
c->unblank();
+ }
+ console_srcu_read_unlock(cookie);
+
console_unlock();
if (!oops_in_progress)
@@ -2941,11 +3054,21 @@ void console_flush_on_panic(enum con_flush_mode mode)
if (mode == CONSOLE_REPLAY_ALL) {
struct console *c;
+ int cookie;
u64 seq;
seq = prb_first_valid_seq(prb);
- for_each_console(c)
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
+ /*
+ * If the above console_trylock() failed, this is an
+ * unsynchronized assignment. But in that case, the
+ * kernel is in "hope and pray" mode anyway.
+ */
c->seq = seq;
+ }
+ console_srcu_read_unlock(cookie);
}
console_unlock();
}
@@ -2957,15 +3080,25 @@ struct tty_driver *console_device(int *index)
{
struct console *c;
struct tty_driver *driver = NULL;
+ int cookie;
+ /*
+ * Take console_lock to serialize device() callback with
+ * other console operations. For example, fg_console is
+ * modified under console_lock when switching vt.
+ */
console_lock();
- for_each_console(c) {
+
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
if (!c->device)
continue;
driver = c->device(c, index);
if (driver)
break;
}
+ console_srcu_read_unlock(cookie);
+
console_unlock();
return driver;
}
@@ -2978,17 +3111,25 @@ struct tty_driver *console_device(int *index)
void console_stop(struct console *console)
{
__pr_flush(console, 1000, true);
- console_lock();
- console->flags &= ~CON_ENABLED;
- console_unlock();
+ console_list_lock();
+ console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
+ console_list_unlock();
+
+ /*
+ * Ensure that all SRCU list walks have completed. All contexts must
+ * be able to see that this console is disabled so that (for example)
+ * the caller can suspend the port without risk of another context
+ * using the port.
+ */
+ synchronize_srcu(&console_srcu);
}
EXPORT_SYMBOL(console_stop);
void console_start(struct console *console)
{
- console_lock();
- console->flags |= CON_ENABLED;
- console_unlock();
+ console_list_lock();
+ console_srcu_write_flags(console, console->flags | CON_ENABLED);
+ console_list_unlock();
__pr_flush(console, 1000, true);
}
EXPORT_SYMBOL(console_start);
@@ -3081,6 +3222,72 @@ static void try_enable_default_console(struct console *newcon)
(con->flags & CON_BOOT) ? "boot" : "", \
con->name, con->index, ##__VA_ARGS__)
+static void console_init_seq(struct console *newcon, bool bootcon_registered)
+{
+ struct console *con;
+ bool handover;
+
+ if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) {
+ /* Get a consistent copy of @syslog_seq. */
+ mutex_lock(&syslog_lock);
+ newcon->seq = syslog_seq;
+ mutex_unlock(&syslog_lock);
+ } else {
+ /* Begin with next message added to ringbuffer. */
+ newcon->seq = prb_next_seq(prb);
+
+ /*
+ * If any enabled boot consoles are due to be unregistered
+ * shortly, some may not be caught up and may be the same
+ * device as @newcon. Since it is not known which boot console
+ * is the same device, flush all consoles and, if necessary,
+ * start with the message of the enabled boot console that is
+ * the furthest behind.
+ */
+ if (bootcon_registered && !keep_bootcon) {
+ /*
+ * Hold the console_lock to stop console printing and
+ * guarantee safe access to console->seq.
+ */
+ console_lock();
+
+ /*
+ * Flush all consoles and set the console to start at
+ * the next unprinted sequence number.
+ */
+ if (!console_flush_all(true, &newcon->seq, &handover)) {
+ /*
+ * Flushing failed. Just choose the lowest
+ * sequence of the enabled boot consoles.
+ */
+
+ /*
+ * If there was a handover, this context no
+ * longer holds the console_lock.
+ */
+ if (handover)
+ console_lock();
+
+ newcon->seq = prb_next_seq(prb);
+ for_each_console(con) {
+ if ((con->flags & CON_BOOT) &&
+ (con->flags & CON_ENABLED) &&
+ con->seq < newcon->seq) {
+ newcon->seq = con->seq;
+ }
+ }
+ }
+
+ console_unlock();
+ }
+ }
+}
+
+#define console_first() \
+ hlist_entry(console_list.first, struct console, node)
+
+static int unregister_console_locked(struct console *console);
+
/*
* The console driver calls this routine during kernel initialization
* to register the console printing procedure with printk() and to
@@ -3103,28 +3310,29 @@ static void try_enable_default_console(struct console *newcon)
void register_console(struct console *newcon)
{
struct console *con;
- bool bootcon_enabled = false;
- bool realcon_enabled = false;
+ bool bootcon_registered = false;
+ bool realcon_registered = false;
int err;
+ console_list_lock();
+
for_each_console(con) {
if (WARN(con == newcon, "console '%s%d' already registered\n",
- con->name, con->index))
- return;
- }
+ con->name, con->index)) {
+ goto unlock;
+ }
- for_each_console(con) {
if (con->flags & CON_BOOT)
- bootcon_enabled = true;
+ bootcon_registered = true;
else
- realcon_enabled = true;
+ realcon_registered = true;
}
/* Do not register boot consoles when there already is a real one. */
- if (newcon->flags & CON_BOOT && realcon_enabled) {
+ if ((newcon->flags & CON_BOOT) && realcon_registered) {
pr_info("Too late to register bootconsole %s%d\n",
newcon->name, newcon->index);
- return;
+ goto unlock;
}
/*
@@ -3140,8 +3348,8 @@ void register_console(struct console *newcon)
* flag set and will be first in the list.
*/
if (preferred_console < 0) {
- if (!console_drivers || !console_drivers->device ||
- console_drivers->flags & CON_BOOT) {
+ if (hlist_empty(&console_list) || !console_first()->device ||
+ console_first()->flags & CON_BOOT) {
try_enable_default_console(newcon);
}
}
@@ -3155,7 +3363,7 @@ void register_console(struct console *newcon)
/* printk() messages are not printed to the Braille console. */
if (err || newcon->flags & CON_BRL)
- return;
+ goto unlock;
/*
* If we have a bootconsole, and are switching to a real console,
@@ -3163,39 +3371,38 @@ void register_console(struct console *newcon)
* the real console are the same physical device, it's annoying to
* see the beginning boot messages twice
*/
- if (bootcon_enabled &&
+ if (bootcon_registered &&
((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
newcon->flags &= ~CON_PRINTBUFFER;
}
+ newcon->dropped = 0;
+ console_init_seq(newcon, bootcon_registered);
+
/*
- * Put this console in the list - keep the
- * preferred driver at the head of the list.
+ * Put this console in the list - keep the
+ * preferred driver at the head of the list.
*/
- console_lock();
- if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {
- newcon->next = console_drivers;
- console_drivers = newcon;
- if (newcon->next)
- newcon->next->flags &= ~CON_CONSDEV;
- /* Ensure this flag is always set for the head of the list */
+ if (hlist_empty(&console_list)) {
+ /* Ensure CON_CONSDEV is always set for the head. */
newcon->flags |= CON_CONSDEV;
- } else {
- newcon->next = console_drivers->next;
- console_drivers->next = newcon;
- }
+ hlist_add_head_rcu(&newcon->node, &console_list);
+
+ } else if (newcon->flags & CON_CONSDEV) {
+ /* Only the new head can have CON_CONSDEV set. */
+ console_srcu_write_flags(console_first(), console_first()->flags & ~CON_CONSDEV);
+ hlist_add_head_rcu(&newcon->node, &console_list);
- newcon->dropped = 0;
- if (newcon->flags & CON_PRINTBUFFER) {
- /* Get a consistent copy of @syslog_seq. */
- mutex_lock(&syslog_lock);
- newcon->seq = syslog_seq;
- mutex_unlock(&syslog_lock);
} else {
- /* Begin with next message. */
- newcon->seq = prb_next_seq(prb);
+ hlist_add_behind_rcu(&newcon->node, console_list.first);
}
- console_unlock();
+
+ /*
+ * No need to synchronize SRCU here! The caller does not rely
+ * on all contexts being able to see the new console before
+ * register_console() completes.
+ */
+
console_sysfs_notify();
/*
@@ -3206,21 +3413,28 @@ void register_console(struct console *newcon)
* went to the bootconsole (that they do not see on the real console)
*/
con_printk(KERN_INFO, newcon, "enabled\n");
- if (bootcon_enabled &&
+ if (bootcon_registered &&
((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
!keep_bootcon) {
- for_each_console(con)
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
if (con->flags & CON_BOOT)
- unregister_console(con);
+ unregister_console_locked(con);
+ }
}
+unlock:
+ console_list_unlock();
}
EXPORT_SYMBOL(register_console);
-int unregister_console(struct console *console)
+/* Must be called under console_list_lock(). */
+static int unregister_console_locked(struct console *console)
{
- struct console *con;
int res;
+ lockdep_assert_console_list_lock_held();
+
con_printk(KERN_INFO, console, "disabled\n");
res = _braille_unregister_console(console);
@@ -3229,48 +3443,94 @@ int unregister_console(struct console *console)
if (res > 0)
return 0;
- res = -ENODEV;
- console_lock();
- if (console_drivers == console) {
- console_drivers=console->next;
- res = 0;
- } else {
- for_each_console(con) {
- if (con->next == console) {
- con->next = console->next;
- res = 0;
- break;
- }
- }
- }
+ /* Disable it unconditionally */
+ console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
+
+ if (!console_is_registered_locked(console))
+ return -ENODEV;
- if (res)
- goto out_disable_unlock;
+ hlist_del_init_rcu(&console->node);
/*
+ * <HISTORICAL>
* If this isn't the last console and it has CON_CONSDEV set, we
* need to set it on the next preferred console.
+ * </HISTORICAL>
+ *
+ * The above makes no sense as there is no guarantee that the next
+ * console has any device attached. Oh well....
*/
- if (console_drivers != NULL && console->flags & CON_CONSDEV)
- console_drivers->flags |= CON_CONSDEV;
+ if (!hlist_empty(&console_list) && console->flags & CON_CONSDEV)
+ console_srcu_write_flags(console_first(), console_first()->flags | CON_CONSDEV);
+
+ /*
+ * Ensure that all SRCU list walks have completed. All contexts
+ * must not be able to see this console in the list so that any
+ * exit/cleanup routines can be performed safely.
+ */
+ synchronize_srcu(&console_srcu);
- console->flags &= ~CON_ENABLED;
- console_unlock();
console_sysfs_notify();
if (console->exit)
res = console->exit(console);
return res;
+}
-out_disable_unlock:
- console->flags &= ~CON_ENABLED;
- console_unlock();
+int unregister_console(struct console *console)
+{
+ int res;
+ console_list_lock();
+ res = unregister_console_locked(console);
+ console_list_unlock();
return res;
}
EXPORT_SYMBOL(unregister_console);
+/**
+ * console_force_preferred_locked - force a registered console preferred
+ * @con: The registered console to force preferred.
+ *
+ * Must be called under console_list_lock().
+ */
+void console_force_preferred_locked(struct console *con)
+{
+ struct console *cur_pref_con;
+
+ if (!console_is_registered_locked(con))
+ return;
+
+ cur_pref_con = console_first();
+
+ /* Already preferred? */
+ if (cur_pref_con == con)
+ return;
+
+ /*
+ * Delete, but do not re-initialize the entry. This allows the console
+ * to continue to appear registered (via any hlist_unhashed_lockless()
+ * checks), even though it was briefly removed from the console list.
+ */
+ hlist_del_rcu(&con->node);
+
+ /*
+ * Ensure that all SRCU list walks have completed so that the console
+ * can be added to the beginning of the console list and its forward
+ * list pointer can be re-initialized.
+ */
+ synchronize_srcu(&console_srcu);
+
+ con->flags |= CON_CONSDEV;
+ WARN_ON(!con->device);
+
+ /* Only the new head can have CON_CONSDEV set. */
+ console_srcu_write_flags(cur_pref_con, cur_pref_con->flags & ~CON_CONSDEV);
+ hlist_add_head_rcu(&con->node, &console_list);
+}
+EXPORT_SYMBOL(console_force_preferred_locked);
+
/*
* Initialize the console device. This is called *early*, so
* we can't necessarily depend on lots of kernel help here.
@@ -3317,10 +3577,12 @@ void __init console_init(void)
*/
static int __init printk_late_init(void)
{
+ struct hlist_node *tmp;
struct console *con;
int ret;
- for_each_console(con) {
+ console_list_lock();
+ hlist_for_each_entry_safe(con, tmp, &console_list, node) {
if (!(con->flags & CON_BOOT))
continue;
@@ -3337,9 +3599,11 @@ static int __init printk_late_init(void)
*/
pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n",
con->name, con->index);
- unregister_console(con);
+ unregister_console_locked(con);
}
}
+ console_list_unlock();
+
ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
console_cpu_notify);
WARN_ON(ret < 0);
@@ -3359,6 +3623,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
struct console *c;
u64 last_diff = 0;
u64 printk_seq;
+ int cookie;
u64 diff;
u64 seq;
@@ -3369,9 +3634,15 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
for (;;) {
diff = 0;
+ /*
+ * Hold the console_lock to guarantee safe access to
+ * console->seq and to prevent changes to @console_suspended
+ * until all consoles have been processed.
+ */
console_lock();
- for_each_console(c) {
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(c) {
if (con && con != c)
continue;
if (!console_is_usable(c))
@@ -3380,6 +3651,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
if (printk_seq < seq)
diff += seq - printk_seq;
}
+ console_srcu_read_unlock(cookie);
/*
* If consoles are suspended, it cannot be expected that they
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index 2b7b6ddab4f7..2dc4d5a1f1ff 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -203,7 +203,7 @@
* prb_rec_init_wr(&r, 5);
*
* // try to extend, but only if it does not exceed 32 bytes
- * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id()), 32) {
+ * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id(), 32)) {
* snprintf(&r.text_buf[r.info->text_len],
* r.text_buf_size - r.info->text_len, "hello");
*
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index d471d22a5e21..ab62074174c3 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -54,27 +54,25 @@ config RCU_EXPERT
Say N if you are unsure.
config SRCU
- bool
- help
- This option selects the sleepable version of RCU. This version
- permits arbitrary sleeping or blocking within RCU read-side critical
- sections.
+ def_bool y
config TINY_SRCU
bool
- default y if SRCU && TINY_RCU
+ default y if TINY_RCU
help
This option selects the single-CPU non-preemptible version of SRCU.
config TREE_SRCU
bool
- default y if SRCU && !TINY_RCU
+ default y if !TINY_RCU
help
This option selects the full-fledged version of SRCU.
+config NEED_SRCU_NMI_SAFE
+ def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
+
config TASKS_RCU_GENERIC
def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
- select SRCU
help
This option enables generic infrastructure code supporting
task-based RCU implementations. Not for manual selection.
@@ -311,4 +309,12 @@ config TASKS_TRACE_RCU_READ_MB
Say N here if you hate read-side memory barriers.
Take the default if you are unsure.
+config RCU_LAZY
+ bool "RCU callback lazy invocation functionality"
+ depends on RCU_NOCB_CPU
+ default n
+ help
+ To save power, batch RCU callbacks and flush after delay, memory
+ pressure, or callback list growing too big.
+
endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 1b0c41d490f0..232e29fe3e5e 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -27,7 +27,6 @@ config RCU_SCALE_TEST
tristate "performance tests for RCU"
depends on DEBUG_KERNEL
select TORTURE_TEST
- select SRCU
default n
help
This option provides a kernel module that runs performance
@@ -43,7 +42,6 @@ config RCU_TORTURE_TEST
tristate "torture tests for RCU"
depends on DEBUG_KERNEL
select TORTURE_TEST
- select SRCU
default n
help
This option provides a kernel module that runs torture tests
@@ -59,7 +57,6 @@ config RCU_REF_SCALE_TEST
tristate "Scalability tests for read-side synchronization (RCU and others)"
depends on DEBUG_KERNEL
select TORTURE_TEST
- select SRCU
default n
help
This option provides a kernel module that runs performance tests
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index be5979da07f5..c5aa934de59b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -286,7 +286,7 @@ void rcu_test_sync_prims(void);
*/
extern void resched_cpu(int cpu);
-#if defined(CONFIG_SRCU) || !defined(CONFIG_TINY_RCU)
+#if !defined(CONFIG_TINY_RCU)
#include <linux/rcu_node_tree.h>
@@ -375,6 +375,10 @@ extern void rcu_init_geometry(void);
(cpu) <= rnp->grphi; \
(cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask)))
+#endif /* !defined(CONFIG_TINY_RCU) */
+
+#if !defined(CONFIG_TINY_RCU) || defined(CONFIG_TASKS_RCU_GENERIC)
+
/*
* Wrappers for the rcu_node::lock acquire and release.
*
@@ -437,7 +441,7 @@ do { \
#define raw_lockdep_assert_held_rcu_node(p) \
lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
-#endif /* #if defined(CONFIG_SRCU) || !defined(CONFIG_TINY_RCU) */
+#endif // #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_TASKS_RCU_GENERIC)
#ifdef CONFIG_TINY_RCU
/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
@@ -474,6 +478,14 @@ enum rcutorture_type {
INVALID_RCU_FLAVOR
};
+#if defined(CONFIG_RCU_LAZY)
+unsigned long rcu_lazy_get_jiffies_till_flush(void);
+void rcu_lazy_set_jiffies_till_flush(unsigned long j);
+#else
+static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
+static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
+#endif
+
#if defined(CONFIG_TREE_RCU)
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
unsigned long *gp_seq);
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 3ef02d4a8108..91fb5905a008 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -95,6 +95,7 @@ torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
+torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
static char *scale_type = "rcu";
module_param(scale_type, charp, 0444);
@@ -175,7 +176,7 @@ static struct rcu_scale_ops rcu_ops = {
.get_gp_seq = rcu_get_gp_seq,
.gp_diff = rcu_seq_diff,
.exp_completed = rcu_exp_batches_completed,
- .async = call_rcu,
+ .async = call_rcu_hurry,
.gp_barrier = rcu_barrier,
.sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited,
@@ -659,6 +660,14 @@ struct kfree_obj {
struct rcu_head rh;
};
+/* Used if doing RCU-kfree'ing via call_rcu(). */
+static void kfree_call_rcu(struct rcu_head *rh)
+{
+ struct kfree_obj *obj = container_of(rh, struct kfree_obj, rh);
+
+ kfree(obj);
+}
+
static int
kfree_scale_thread(void *arg)
{
@@ -696,6 +705,11 @@ kfree_scale_thread(void *arg)
if (!alloc_ptr)
return -ENOMEM;
+ if (kfree_by_call_rcu) {
+ call_rcu(&(alloc_ptr->rh), kfree_call_rcu);
+ continue;
+ }
+
// By default kfree_rcu_test_single and kfree_rcu_test_double are
// initialized to false. If both have the same value (false or true)
// both are randomly tested, otherwise only the one with value true
@@ -767,11 +781,58 @@ kfree_scale_shutdown(void *arg)
return -EINVAL;
}
+// Used if doing RCU-kfree'ing via call_rcu().
+static unsigned long jiffies_at_lazy_cb;
+static struct rcu_head lazy_test1_rh;
+static int rcu_lazy_test1_cb_called;
+static void call_rcu_lazy_test1(struct rcu_head *rh)
+{
+ jiffies_at_lazy_cb = jiffies;
+ WRITE_ONCE(rcu_lazy_test1_cb_called, 1);
+}
+
static int __init
kfree_scale_init(void)
{
- long i;
int firsterr = 0;
+ long i;
+ unsigned long jif_start;
+ unsigned long orig_jif;
+
+ // Also, do a quick self-test to ensure laziness is as much as
+ // expected.
+ if (kfree_by_call_rcu && !IS_ENABLED(CONFIG_RCU_LAZY)) {
+ pr_alert("CONFIG_RCU_LAZY is disabled, falling back to kfree_rcu() for delayed RCU kfree'ing\n");
+ kfree_by_call_rcu = 0;
+ }
+
+ if (kfree_by_call_rcu) {
+ /* do a test to check the timeout. */
+ orig_jif = rcu_lazy_get_jiffies_till_flush();
+
+ rcu_lazy_set_jiffies_till_flush(2 * HZ);
+ rcu_barrier();
+
+ jif_start = jiffies;
+ jiffies_at_lazy_cb = 0;
+ call_rcu(&lazy_test1_rh, call_rcu_lazy_test1);
+
+ smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1);
+
+ rcu_lazy_set_jiffies_till_flush(orig_jif);
+
+ if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) {
+ pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n");
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) {
+ pr_alert("ERROR: call_rcu() CBs are being too lazy!\n");
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+ }
kfree_nrealthreads = compute_real(kfree_nthreads);
/* Start up the kthreads. */
@@ -784,7 +845,9 @@ kfree_scale_init(void)
schedule_timeout_uninterruptible(1);
}
- pr_alert("kfree object size=%zu\n", kfree_mult * sizeof(struct kfree_obj));
+ pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n",
+ kfree_mult * sizeof(struct kfree_obj),
+ kfree_by_call_rcu);
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
GFP_KERNEL);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 503c2aa845a4..634df26a2c27 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -357,6 +357,10 @@ struct rcu_torture_ops {
bool (*poll_gp_state_exp)(unsigned long oldstate);
void (*cond_sync_exp)(unsigned long oldstate);
void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp);
+ unsigned long (*get_comp_state)(void);
+ void (*get_comp_state_full)(struct rcu_gp_oldstate *rgosp);
+ bool (*same_gp_state)(unsigned long oldstate1, unsigned long oldstate2);
+ bool (*same_gp_state_full)(struct rcu_gp_oldstate *rgosp1, struct rcu_gp_oldstate *rgosp2);
unsigned long (*get_gp_state)(void);
void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp);
unsigned long (*get_gp_completed)(void);
@@ -510,7 +514,7 @@ static unsigned long rcu_no_completed(void)
static void rcu_torture_deferred_free(struct rcu_torture *p)
{
- call_rcu(&p->rtort_rcu, rcu_torture_cb);
+ call_rcu_hurry(&p->rtort_rcu, rcu_torture_cb);
}
static void rcu_sync_torture_init(void)
@@ -535,6 +539,10 @@ static struct rcu_torture_ops rcu_ops = {
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited,
+ .same_gp_state = same_state_synchronize_rcu,
+ .same_gp_state_full = same_state_synchronize_rcu_full,
+ .get_comp_state = get_completed_synchronize_rcu,
+ .get_comp_state_full = get_completed_synchronize_rcu_full,
.get_gp_state = get_state_synchronize_rcu,
.get_gp_state_full = get_state_synchronize_rcu_full,
.get_gp_completed = get_completed_synchronize_rcu,
@@ -551,7 +559,7 @@ static struct rcu_torture_ops rcu_ops = {
.start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full,
.poll_gp_state_exp = poll_state_synchronize_rcu,
.cond_sync_exp = cond_synchronize_rcu_expedited,
- .call = call_rcu,
+ .call = call_rcu_hurry,
.cb_barrier = rcu_barrier,
.fqs = rcu_force_quiescent_state,
.stats = NULL,
@@ -615,10 +623,14 @@ static struct rcu_torture_ops rcu_busted_ops = {
DEFINE_STATIC_SRCU(srcu_ctl);
static struct srcu_struct srcu_ctld;
static struct srcu_struct *srcu_ctlp = &srcu_ctl;
+static struct rcu_torture_ops srcud_ops;
static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
{
- return srcu_read_lock(srcu_ctlp);
+ if (cur_ops == &srcud_ops)
+ return srcu_read_lock_nmisafe(srcu_ctlp);
+ else
+ return srcu_read_lock(srcu_ctlp);
}
static void
@@ -642,7 +654,10 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
{
- srcu_read_unlock(srcu_ctlp, idx);
+ if (cur_ops == &srcud_ops)
+ srcu_read_unlock_nmisafe(srcu_ctlp, idx);
+ else
+ srcu_read_unlock(srcu_ctlp, idx);
}
static int torture_srcu_read_lock_held(void)
@@ -848,7 +863,7 @@ static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
static void synchronize_rcu_mult_test(void)
{
- synchronize_rcu_mult(call_rcu_tasks, call_rcu);
+ synchronize_rcu_mult(call_rcu_tasks, call_rcu_hurry);
}
static struct rcu_torture_ops tasks_ops = {
@@ -1258,13 +1273,15 @@ static void rcu_torture_write_types(void)
} else if (gp_normal && !cur_ops->deferred_free) {
pr_alert("%s: gp_normal without primitives.\n", __func__);
}
- if (gp_poll1 && cur_ops->start_gp_poll && cur_ops->poll_gp_state) {
+ if (gp_poll1 && cur_ops->get_comp_state && cur_ops->same_gp_state &&
+ cur_ops->start_gp_poll && cur_ops->poll_gp_state) {
synctype[nsynctypes++] = RTWS_POLL_GET;
pr_info("%s: Testing polling GPs.\n", __func__);
} else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
pr_alert("%s: gp_poll without primitives.\n", __func__);
}
- if (gp_poll_full1 && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) {
+ if (gp_poll_full1 && cur_ops->get_comp_state_full && cur_ops->same_gp_state_full
+ && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) {
synctype[nsynctypes++] = RTWS_POLL_GET_FULL;
pr_info("%s: Testing polling full-state GPs.\n", __func__);
} else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) {
@@ -1339,14 +1356,18 @@ rcu_torture_writer(void *arg)
struct rcu_gp_oldstate cookie_full;
int expediting = 0;
unsigned long gp_snap;
+ unsigned long gp_snap1;
struct rcu_gp_oldstate gp_snap_full;
+ struct rcu_gp_oldstate gp_snap1_full;
int i;
int idx;
int oldnice = task_nice(current);
+ struct rcu_gp_oldstate rgo[NUM_ACTIVE_RCU_POLL_FULL_OLDSTATE];
struct rcu_torture *rp;
struct rcu_torture *old_rp;
static DEFINE_TORTURE_RANDOM(rand);
bool stutter_waited;
+ unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE];
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite)
@@ -1463,20 +1484,43 @@ rcu_torture_writer(void *arg)
break;
case RTWS_POLL_GET:
rcu_torture_writer_state = RTWS_POLL_GET;
+ for (i = 0; i < ARRAY_SIZE(ulo); i++)
+ ulo[i] = cur_ops->get_comp_state();
gp_snap = cur_ops->start_gp_poll();
rcu_torture_writer_state = RTWS_POLL_WAIT;
- while (!cur_ops->poll_gp_state(gp_snap))
+ while (!cur_ops->poll_gp_state(gp_snap)) {
+ gp_snap1 = cur_ops->get_gp_state();
+ for (i = 0; i < ARRAY_SIZE(ulo); i++)
+ if (cur_ops->poll_gp_state(ulo[i]) ||
+ cur_ops->same_gp_state(ulo[i], gp_snap1)) {
+ ulo[i] = gp_snap1;
+ break;
+ }
+ WARN_ON_ONCE(i >= ARRAY_SIZE(ulo));
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
+ }
rcu_torture_pipe_update(old_rp);
break;
case RTWS_POLL_GET_FULL:
rcu_torture_writer_state = RTWS_POLL_GET_FULL;
+ for (i = 0; i < ARRAY_SIZE(rgo); i++)
+ cur_ops->get_comp_state_full(&rgo[i]);
cur_ops->start_gp_poll_full(&gp_snap_full);
rcu_torture_writer_state = RTWS_POLL_WAIT_FULL;
- while (!cur_ops->poll_gp_state_full(&gp_snap_full))
+ while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
+ cur_ops->get_gp_state_full(&gp_snap1_full);
+ for (i = 0; i < ARRAY_SIZE(rgo); i++)
+ if (cur_ops->poll_gp_state_full(&rgo[i]) ||
+ cur_ops->same_gp_state_full(&rgo[i],
+ &gp_snap1_full)) {
+ rgo[i] = gp_snap1_full;
+ break;
+ }
+ WARN_ON_ONCE(i >= ARRAY_SIZE(rgo));
torture_hrtimeout_jiffies(torture_random(&rand) % 16,
&rand);
+ }
rcu_torture_pipe_update(old_rp);
break;
case RTWS_POLL_GET_EXP:
@@ -3388,13 +3432,13 @@ static void rcu_test_debug_objects(void)
/* Try to queue the rh2 pair of callbacks for the same grace period. */
preempt_disable(); /* Prevent preemption from interrupting test. */
rcu_read_lock(); /* Make it impossible to finish a grace period. */
- call_rcu(&rh1, rcu_torture_leak_cb); /* Start grace period. */
+ call_rcu_hurry(&rh1, rcu_torture_leak_cb); /* Start grace period. */
local_irq_disable(); /* Make it harder to start a new grace period. */
- call_rcu(&rh2, rcu_torture_leak_cb);
- call_rcu(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
+ call_rcu_hurry(&rh2, rcu_torture_leak_cb);
+ call_rcu_hurry(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
if (rhp) {
- call_rcu(rhp, rcu_torture_leak_cb);
- call_rcu(rhp, rcu_torture_err_cb); /* Another duplicate callback. */
+ call_rcu_hurry(rhp, rcu_torture_leak_cb);
+ call_rcu_hurry(rhp, rcu_torture_err_cb); /* Another duplicate callback. */
}
local_irq_enable();
rcu_read_unlock();
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 33adafdad261..b12fb0cec44d 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -197,6 +197,16 @@ void synchronize_srcu(struct srcu_struct *ssp)
{
struct rcu_synchronize rs;
+ RCU_LOCKDEP_WARN(lockdep_is_held(ssp) ||
+ lock_is_held(&rcu_bh_lock_map) ||
+ lock_is_held(&rcu_lock_map) ||
+ lock_is_held(&rcu_sched_lock_map),
+ "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
+
+ if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
+ return;
+
+ might_sleep();
init_rcu_head_on_stack(&rs.head);
init_completion(&rs.completion);
call_srcu(ssp, &rs.head, wakeme_after_rcu);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 1c304fec89c0..ca4b5dcec675 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -417,7 +417,7 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
for_each_possible_cpu(cpu) {
struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
- sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
+ sum += atomic_long_read(&cpuc->srcu_lock_count[idx]);
}
return sum;
}
@@ -429,13 +429,18 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx)
static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx)
{
int cpu;
+ unsigned long mask = 0;
unsigned long sum = 0;
for_each_possible_cpu(cpu) {
struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
- sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
+ sum += atomic_long_read(&cpuc->srcu_unlock_count[idx]);
+ if (IS_ENABLED(CONFIG_PROVE_RCU))
+ mask = mask | READ_ONCE(cpuc->srcu_nmi_safety);
}
+ WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask >> 1)),
+ "Mixed NMI-safe readers for srcu_struct at %ps.\n", ssp);
return sum;
}
@@ -503,10 +508,10 @@ static bool srcu_readers_active(struct srcu_struct *ssp)
for_each_possible_cpu(cpu) {
struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu);
- sum += READ_ONCE(cpuc->srcu_lock_count[0]);
- sum += READ_ONCE(cpuc->srcu_lock_count[1]);
- sum -= READ_ONCE(cpuc->srcu_unlock_count[0]);
- sum -= READ_ONCE(cpuc->srcu_unlock_count[1]);
+ sum += atomic_long_read(&cpuc->srcu_lock_count[0]);
+ sum += atomic_long_read(&cpuc->srcu_lock_count[1]);
+ sum -= atomic_long_read(&cpuc->srcu_unlock_count[0]);
+ sum -= atomic_long_read(&cpuc->srcu_unlock_count[1]);
}
return sum;
}
@@ -626,6 +631,29 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+#ifdef CONFIG_PROVE_RCU
+/*
+ * Check for consistent NMI safety.
+ */
+void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe)
+{
+ int nmi_safe_mask = 1 << nmi_safe;
+ int old_nmi_safe_mask;
+ struct srcu_data *sdp;
+
+ /* NMI-unsafe use in NMI is a bad sign */
+ WARN_ON_ONCE(!nmi_safe && in_nmi());
+ sdp = raw_cpu_ptr(ssp->sda);
+ old_nmi_safe_mask = READ_ONCE(sdp->srcu_nmi_safety);
+ if (!old_nmi_safe_mask) {
+ WRITE_ONCE(sdp->srcu_nmi_safety, nmi_safe_mask);
+ return;
+ }
+ WARN_ONCE(old_nmi_safe_mask != nmi_safe_mask, "CPU %d old state %d new state %d\n", sdp->cpu, old_nmi_safe_mask, nmi_safe_mask);
+}
+EXPORT_SYMBOL_GPL(srcu_check_nmi_safety);
+#endif /* CONFIG_PROVE_RCU */
+
/*
* Counts the new reader in the appropriate per-CPU element of the
* srcu_struct.
@@ -636,7 +664,7 @@ int __srcu_read_lock(struct srcu_struct *ssp)
int idx;
idx = READ_ONCE(ssp->srcu_idx) & 0x1;
- this_cpu_inc(ssp->sda->srcu_lock_count[idx]);
+ this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
smp_mb(); /* B */ /* Avoid leaking the critical section. */
return idx;
}
@@ -650,10 +678,45 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
{
smp_mb(); /* C */ /* Avoid leaking the critical section. */
- this_cpu_inc(ssp->sda->srcu_unlock_count[idx]);
+ this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
+#ifdef CONFIG_NEED_SRCU_NMI_SAFE
+
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct, but in an NMI-safe manner using RMW atomics.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+{
+ int idx;
+ struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+
+ idx = READ_ONCE(ssp->srcu_idx) & 0x1;
+ atomic_long_inc(&sdp->srcu_lock_count[idx]);
+ smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */
+ return idx;
+}
+EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
+
+/*
+ * Removes the count for the old reader from the appropriate per-CPU
+ * element of the srcu_struct. Note that this may well be a different
+ * CPU than that which was incremented by the corresponding srcu_read_lock().
+ */
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+{
+ struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+
+ smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */
+ atomic_long_inc(&sdp->srcu_unlock_count[idx]);
+}
+EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
+
+#endif // CONFIG_NEED_SRCU_NMI_SAFE
+
/*
* Start an SRCU grace period.
*/
@@ -1090,7 +1153,12 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
int ss_state;
check_init_srcu_struct(ssp);
- idx = srcu_read_lock(ssp);
+ /*
+ * While starting a new grace period, make sure we are in an
+ * SRCU read-side critical section so that the grace-period
+ * sequence number cannot wrap around in the meantime.
+ */
+ idx = __srcu_read_lock_nmisafe(ssp);
ss_state = smp_load_acquire(&ssp->srcu_size_state);
if (ss_state < SRCU_SIZE_WAIT_CALL)
sdp = per_cpu_ptr(ssp->sda, 0);
@@ -1123,7 +1191,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
srcu_funnel_gp_start(ssp, sdp, s, do_norm);
else if (needexp)
srcu_funnel_exp_start(ssp, sdp_mynode, s);
- srcu_read_unlock(ssp, idx);
+ __srcu_read_unlock_nmisafe(ssp, idx);
return s;
}
@@ -1427,13 +1495,13 @@ void srcu_barrier(struct srcu_struct *ssp)
/* Initial count prevents reaching zero until all CBs are posted. */
atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
- idx = srcu_read_lock(ssp);
+ idx = __srcu_read_lock_nmisafe(ssp);
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
else
for_each_possible_cpu(cpu)
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
- srcu_read_unlock(ssp, idx);
+ __srcu_read_unlock_nmisafe(ssp, idx);
/* Remove the initial count, at which point reaching zero can happen. */
if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))
@@ -1687,8 +1755,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
struct srcu_data *sdp;
sdp = per_cpu_ptr(ssp->sda, cpu);
- u0 = data_race(sdp->srcu_unlock_count[!idx]);
- u1 = data_race(sdp->srcu_unlock_count[idx]);
+ u0 = data_race(atomic_long_read(&sdp->srcu_unlock_count[!idx]));
+ u1 = data_race(atomic_long_read(&sdp->srcu_unlock_count[idx]));
/*
* Make sure that a lock is always counted if the corresponding
@@ -1696,8 +1764,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
*/
smp_rmb();
- l0 = data_race(sdp->srcu_lock_count[!idx]);
- l1 = data_race(sdp->srcu_lock_count[idx]);
+ l0 = data_race(atomic_long_read(&sdp->srcu_lock_count[!idx]));
+ l1 = data_race(atomic_long_read(&sdp->srcu_lock_count[idx]));
c0 = l0 - u0;
c1 = l1 - u1;
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 5cefc702158f..e550f97779b8 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -44,7 +44,7 @@ static void rcu_sync_func(struct rcu_head *rhp);
static void rcu_sync_call(struct rcu_sync *rsp)
{
- call_rcu(&rsp->cb_head, rcu_sync_func);
+ call_rcu_hurry(&rsp->cb_head, rcu_sync_func);
}
/**
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index f5bf6fb430da..b0b885e071fa 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -728,7 +728,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
if (rtsi > 0 && !reported && time_after(j, lastinfo + rtsi)) {
lastinfo = j;
rtsi = rtsi * rcu_task_stall_info_mult;
- pr_info("%s: %s grace period %lu is %lu jiffies old.\n",
+ pr_info("%s: %s grace period number %lu (since boot) is %lu jiffies old.\n",
__func__, rtp->kname, rtp->tasks_gp_seq, j - rtp->gp_start);
}
}
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index a33a8d4942c3..72913ce21258 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
void rcu_barrier(void)
{
- wait_rcu_gp(call_rcu);
+ wait_rcu_gp(call_rcu_hurry);
}
EXPORT_SYMBOL(rcu_barrier);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 93416afebd59..d04f2192f02c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -301,12 +301,6 @@ static bool rcu_dynticks_in_eqs(int snap)
return !(snap & RCU_DYNTICKS_IDX);
}
-/* Return true if the specified CPU is currently idle from an RCU viewpoint. */
-bool rcu_is_idle_cpu(int cpu)
-{
- return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
-}
-
/*
* Return true if the CPU corresponding to the specified rcu_data
* structure has spent some time in an extended quiescent state since
@@ -2108,7 +2102,7 @@ int rcutree_dying_cpu(unsigned int cpu)
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
return 0;
- blkd = !!(rnp->qsmask & rdp->grpmask);
+ blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
return 0;
@@ -2418,7 +2412,7 @@ void rcu_force_quiescent_state(void)
struct rcu_node *rnp_old = NULL;
/* Funnel through hierarchy to reduce memory contention. */
- rnp = __this_cpu_read(rcu_data.mynode);
+ rnp = raw_cpu_read(rcu_data.mynode);
for (; rnp != NULL; rnp = rnp->parent) {
ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
!raw_spin_trylock(&rnp->fqslock);
@@ -2730,47 +2724,8 @@ static void check_cb_ovld(struct rcu_data *rdp)
raw_spin_unlock_rcu_node(rnp);
}
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed. However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.
- *
- * RCU read-side critical sections are delimited by rcu_read_lock()
- * and rcu_read_unlock(), and may be nested. In addition, but only in
- * v5.0 and later, regions of code across which interrupts, preemption,
- * or softirqs have been disabled also serve as RCU read-side critical
- * sections. This includes hardware interrupt handlers, softirq handlers,
- * and NMI handlers.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section. On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu(). It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section. Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- *
- * Implementation of these memory-ordering guarantees is described here:
- * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
- */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
+static void
+__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
{
static atomic_t doublefrees;
unsigned long flags;
@@ -2811,7 +2766,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
}
check_cb_ovld(rdp);
- if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
+ if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
return; // Enqueued onto ->nocb_bypass, so just leave.
// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
rcu_segcblist_enqueue(&rdp->cblist, head);
@@ -2833,8 +2788,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
local_irq_restore(flags);
}
}
-EXPORT_SYMBOL_GPL(call_rcu);
+#ifdef CONFIG_RCU_LAZY
+/**
+ * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
+ * flush all lazy callbacks (including the new one) to the main ->cblist while
+ * doing so.
+ *
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.
+ *
+ * Use this API instead of call_rcu() if you don't want the callback to be
+ * invoked after very long periods of time, which can happen on systems without
+ * memory pressure and on systems which are lightly loaded or mostly idle.
+ * This function will cause callbacks to be invoked sooner than later at the
+ * expense of extra power. Other than that, this function is identical to, and
+ * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
+ * ordering and other functionality.
+ */
+void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
+{
+ return __call_rcu_common(head, func, false);
+}
+EXPORT_SYMBOL_GPL(call_rcu_hurry);
+#endif
+
+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * By default the callbacks are 'lazy' and are kept hidden from the main
+ * ->cblist to prevent starting of grace periods too soon.
+ * If you desire grace periods to start very soon, use call_rcu_hurry().
+ *
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed. However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.
+ *
+ * RCU read-side critical sections are delimited by rcu_read_lock()
+ * and rcu_read_unlock(), and may be nested. In addition, but only in
+ * v5.0 and later, regions of code across which interrupts, preemption,
+ * or softirqs have been disabled also serve as RCU read-side critical
+ * sections. This includes hardware interrupt handlers, softirq handlers,
+ * and NMI handlers.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section. On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu(). It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section. Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
+ *
+ * Implementation of these memory-ordering guarantees is described here:
+ * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+ */
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
+{
+ return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
+}
+EXPORT_SYMBOL_GPL(call_rcu);
/* Maximum number of jiffies to wait before draining a batch. */
#define KFREE_DRAIN_JIFFIES (5 * HZ)
@@ -3509,7 +3540,7 @@ void synchronize_rcu(void)
if (rcu_gp_is_expedited())
synchronize_rcu_expedited();
else
- wait_rcu_gp(call_rcu);
+ wait_rcu_gp(call_rcu_hurry);
return;
}
@@ -3896,6 +3927,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
{
unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
+ bool wake_nocb = false;
+ bool was_alldone = false;
lockdep_assert_held(&rcu_state.barrier_lock);
if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
@@ -3904,7 +3937,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
rdp->barrier_head.func = rcu_barrier_callback;
debug_rcu_head_queue(&rdp->barrier_head);
rcu_nocb_lock(rdp);
- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+ /*
+ * Flush bypass and wakeup rcuog if we add callbacks to an empty regular
+ * queue. This way we don't wait for bypass timer that can reach seconds
+ * if it's fully lazy.
+ */
+ was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
+ wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
atomic_inc(&rcu_state.barrier_cpu_count);
} else {
@@ -3912,6 +3952,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
}
rcu_nocb_unlock(rdp);
+ if (wake_nocb)
+ wake_nocb_gp(rdp, false);
smp_store_release(&rdp->barrier_seq_snap, gseq);
}
@@ -4278,8 +4320,6 @@ void rcu_report_dead(unsigned int cpu)
// Do any dangling deferred wakeups.
do_nocb_deferred_wakeup(rdp);
- /* QS for any half-done expedited grace period. */
- rcu_report_exp_rdp(rdp);
rcu_preempt_deferred_qs(current);
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
@@ -4327,7 +4367,7 @@ void rcutree_migrate_callbacks(int cpu)
my_rdp = this_cpu_ptr(&rcu_data);
my_rnp = my_rdp->mynode;
rcu_nocb_lock(my_rdp); /* irqs already disabled. */
- WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
/* Leverage recent GPs and set GP for new callbacks. */
needwake = rcu_advance_cbs(my_rnp, rdp) ||
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index d4a97e40ea9c..fcb5d696eb17 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -263,14 +263,16 @@ struct rcu_data {
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
+ long lazy_len; /* Length of buffered lazy callbacks. */
int cpu;
};
/* Values for nocb_defer_wakeup field in struct rcu_data. */
#define RCU_NOCB_WAKE_NOT 0
#define RCU_NOCB_WAKE_BYPASS 1
-#define RCU_NOCB_WAKE 2
-#define RCU_NOCB_WAKE_FORCE 3
+#define RCU_NOCB_WAKE_LAZY 2
+#define RCU_NOCB_WAKE 3
+#define RCU_NOCB_WAKE_FORCE 4
#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
/* For jiffies_till_first_fqs and */
@@ -439,10 +441,12 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
static void rcu_init_one_nocb(struct rcu_node *rnp);
+static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- unsigned long j);
+ unsigned long j, bool lazy);
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- bool *was_alldone, unsigned long flags);
+ bool *was_alldone, unsigned long flags,
+ bool lazy);
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 18e9b4cd78ef..ed6c3cce28f2 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -937,7 +937,7 @@ void synchronize_rcu_expedited(void)
/* If expedited grace periods are prohibited, fall back to normal. */
if (rcu_gp_is_normal()) {
- wait_rcu_gp(call_rcu);
+ wait_rcu_gp(call_rcu_hurry);
return;
}
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 0a5f0ef41484..9e1c8caec5ce 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -257,6 +257,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
}
/*
+ * LAZY_FLUSH_JIFFIES decides the maximum amount of time that
+ * can elapse before lazy callbacks are flushed. Lazy callbacks
+ * could be flushed much earlier for a number of other reasons
+ * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are
+ * left unsubmitted to RCU after those many jiffies.
+ */
+#define LAZY_FLUSH_JIFFIES (10 * HZ)
+static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES;
+
+#ifdef CONFIG_RCU_LAZY
+// To be called only from test code.
+void rcu_lazy_set_jiffies_till_flush(unsigned long jif)
+{
+ jiffies_till_flush = jif;
+}
+EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush);
+
+unsigned long rcu_lazy_get_jiffies_till_flush(void)
+{
+ return jiffies_till_flush;
+}
+EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush);
+#endif
+
+/*
* Arrange to wake the GP kthread for this NOCB group at some future
* time when it is safe to do so.
*/
@@ -269,10 +294,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
/*
- * Bypass wakeup overrides previous deferments. In case
- * of callback storm, no need to wake up too early.
+ * Bypass wakeup overrides previous deferments. In case of
+ * callback storms, no need to wake up too early.
*/
- if (waketype == RCU_NOCB_WAKE_BYPASS) {
+ if (waketype == RCU_NOCB_WAKE_LAZY &&
+ rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) {
+ mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush);
+ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
+ } else if (waketype == RCU_NOCB_WAKE_BYPASS) {
mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
} else {
@@ -293,12 +322,16 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
* proves to be initially empty, just return false because the no-CB GP
* kthread may need to be awakened in this case.
*
+ * Return true if there was something to be flushed and it succeeded, otherwise
+ * false.
+ *
* Note that this function always returns true if rhp is NULL.
*/
-static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- unsigned long j)
+static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in,
+ unsigned long j, bool lazy)
{
struct rcu_cblist rcl;
+ struct rcu_head *rhp = rhp_in;
WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
rcu_lockdep_assert_cblist_protected(rdp);
@@ -310,7 +343,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
/* Note: ->cblist.len already accounts for ->nocb_bypass contents. */
if (rhp)
rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
+
+ /*
+ * If the new CB requested was a lazy one, queue it onto the main
+ * ->cblist so that we can take advantage of the grace-period that will
+ * happen regardless. But queue it onto the bypass list first so that
+ * the lazy CB is ordered with the existing CBs in the bypass list.
+ */
+ if (lazy && rhp) {
+ rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
+ rhp = NULL;
+ }
rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
+ WRITE_ONCE(rdp->lazy_len, 0);
+
rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
WRITE_ONCE(rdp->nocb_bypass_first, j);
rcu_nocb_bypass_unlock(rdp);
@@ -326,13 +372,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
* Note that this function always returns true if rhp is NULL.
*/
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- unsigned long j)
+ unsigned long j, bool lazy)
{
if (!rcu_rdp_is_offloaded(rdp))
return true;
rcu_lockdep_assert_cblist_protected(rdp);
rcu_nocb_bypass_lock(rdp);
- return rcu_nocb_do_flush_bypass(rdp, rhp, j);
+ return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy);
}
/*
@@ -345,7 +391,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
if (!rcu_rdp_is_offloaded(rdp) ||
!rcu_nocb_bypass_trylock(rdp))
return;
- WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
+ WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false));
}
/*
@@ -367,12 +413,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
* there is only one CPU in operation.
*/
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- bool *was_alldone, unsigned long flags)
+ bool *was_alldone, unsigned long flags,
+ bool lazy)
{
unsigned long c;
unsigned long cur_gp_seq;
unsigned long j = jiffies;
long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+ bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len));
lockdep_assert_irqs_disabled();
@@ -417,24 +465,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
// If there hasn't yet been all that many ->cblist enqueues
// this jiffy, tell the caller to enqueue onto ->cblist. But flush
// ->nocb_bypass first.
- if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
+ // Lazy CBs throttle this back and do immediate bypass queuing.
+ if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) {
rcu_nocb_lock(rdp);
*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
if (*was_alldone)
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstQ"));
- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
+
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false));
WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
return false; // Caller must enqueue the callback.
}
// If ->nocb_bypass has been used too long or is too full,
// flush ->nocb_bypass to ->cblist.
- if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
+ if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) ||
+ (ncbs && bypass_is_lazy &&
+ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) ||
ncbs >= qhimark) {
rcu_nocb_lock(rdp);
- if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+ *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
+
+ if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) {
if (*was_alldone)
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("FirstQ"));
@@ -447,7 +500,12 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
rcu_advance_cbs_nowake(rdp->mynode, rdp);
rdp->nocb_gp_adv_time = j;
}
- rcu_nocb_unlock_irqrestore(rdp, flags);
+
+ // The flush succeeded and we moved CBs into the regular list.
+ // Don't wait for the wake up timer as it may be too far ahead.
+ // Wake up the GP thread now instead, if the cblist was empty.
+ __call_rcu_nocb_wake(rdp, *was_alldone, flags);
+
return true; // Callback already enqueued.
}
@@ -457,13 +515,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
+
+ if (lazy)
+ WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1);
+
if (!ncbs) {
WRITE_ONCE(rdp->nocb_bypass_first, j);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
}
rcu_nocb_bypass_unlock(rdp);
smp_mb(); /* Order enqueue before wake. */
- if (ncbs) {
+ // A wake up of the grace period kthread or timer adjustment
+ // needs to be done only if:
+ // 1. Bypass list was fully empty before (this is the first
+ // bypass list entry), or:
+ // 2. Both of these conditions are met:
+ // a. The bypass list previously had only lazy CBs, and:
+ // b. The new CB is non-lazy.
+ if (ncbs && (!bypass_is_lazy || lazy)) {
local_irq_restore(flags);
} else {
// No-CBs GP kthread might be indefinitely asleep, if so, wake.
@@ -491,8 +560,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
unsigned long flags)
__releases(rdp->nocb_lock)
{
+ long bypass_len;
unsigned long cur_gp_seq;
unsigned long j;
+ long lazy_len;
long len;
struct task_struct *t;
@@ -506,9 +577,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
}
// Need to actually to a wakeup.
len = rcu_segcblist_n_cbs(&rdp->cblist);
+ bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+ lazy_len = READ_ONCE(rdp->lazy_len);
if (was_alldone) {
rdp->qlen_last_fqs_check = len;
- if (!irqs_disabled_flags(flags)) {
+ // Only lazy CBs in bypass list
+ if (lazy_len && bypass_len == lazy_len) {
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
+ TPS("WakeLazy"));
+ } else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
rcu_nocb_unlock_irqrestore(rdp, flags);
wake_nocb_gp(rdp, false);
@@ -599,12 +677,12 @@ static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu)
static void nocb_gp_wait(struct rcu_data *my_rdp)
{
bool bypass = false;
- long bypass_ncbs;
int __maybe_unused cpu = my_rdp->cpu;
unsigned long cur_gp_seq;
unsigned long flags;
bool gotcbs = false;
unsigned long j = jiffies;
+ bool lazy = false;
bool needwait_gp = false; // This prevents actual uninitialized use.
bool needwake;
bool needwake_gp;
@@ -634,24 +712,43 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
* won't be ignored for long.
*/
list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) {
+ long bypass_ncbs;
+ bool flush_bypass = false;
+ long lazy_ncbs;
+
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
rcu_nocb_lock_irqsave(rdp, flags);
lockdep_assert_held(&rdp->nocb_lock);
bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
- if (bypass_ncbs &&
+ lazy_ncbs = READ_ONCE(rdp->lazy_len);
+
+ if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) &&
+ (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) ||
+ bypass_ncbs > 2 * qhimark)) {
+ flush_bypass = true;
+ } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) &&
(time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
bypass_ncbs > 2 * qhimark)) {
- // Bypass full or old, so flush it.
- (void)rcu_nocb_try_flush_bypass(rdp, j);
- bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+ flush_bypass = true;
} else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
rcu_nocb_unlock_irqrestore(rdp, flags);
continue; /* No callbacks here, try next. */
}
+
+ if (flush_bypass) {
+ // Bypass full or old, so flush it.
+ (void)rcu_nocb_try_flush_bypass(rdp, j);
+ bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
+ lazy_ncbs = READ_ONCE(rdp->lazy_len);
+ }
+
if (bypass_ncbs) {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
- TPS("Bypass"));
- bypass = true;
+ bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass"));
+ if (bypass_ncbs == lazy_ncbs)
+ lazy = true;
+ else
+ bypass = true;
}
rnp = rdp->mynode;
@@ -699,12 +796,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
my_rdp->nocb_gp_gp = needwait_gp;
my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
- if (bypass && !rcu_nocb_poll) {
- // At least one child with non-empty ->nocb_bypass, so set
- // timer in order to avoid stranding its callbacks.
- wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
- TPS("WakeBypassIsDeferred"));
+ // At least one child with non-empty ->nocb_bypass, so set
+ // timer in order to avoid stranding its callbacks.
+ if (!rcu_nocb_poll) {
+ // If bypass list only has lazy CBs. Add a deferred lazy wake up.
+ if (lazy && !bypass) {
+ wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY,
+ TPS("WakeLazyIsDeferred"));
+ // Otherwise add a deferred bypass wake up.
+ } else if (bypass) {
+ wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
+ TPS("WakeBypassIsDeferred"));
+ }
}
+
if (rcu_nocb_poll) {
/* Polling, so trace if first poll in the series. */
if (gotcbs)
@@ -1030,7 +1135,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
* return false, which means that future calls to rcu_nocb_try_bypass()
* will refuse to put anything into the bypass.
*/
- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
/*
* Start with invoking rcu_core() early. This way if the current thread
* happens to preempt an ongoing call to rcu_core() in the middle,
@@ -1207,47 +1312,87 @@ int rcu_nocb_cpu_offload(int cpu)
}
EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload);
-void __init rcu_init_nohz(void)
+static unsigned long
+lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
int cpu;
- bool need_rcu_nocb_mask = false;
- bool offload_all = false;
- struct rcu_data *rdp;
+ unsigned long count = 0;
-#if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL)
- if (!rcu_state.nocb_is_setup) {
- need_rcu_nocb_mask = true;
- offload_all = true;
+ /* Snapshot count of all CPUs */
+ for_each_possible_cpu(cpu) {
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ count += READ_ONCE(rdp->lazy_len);
}
-#endif /* #if defined(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) */
-#if defined(CONFIG_NO_HZ_FULL)
- if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) {
- need_rcu_nocb_mask = true;
- offload_all = false; /* NO_HZ_FULL has its own mask. */
+ return count ? count : SHRINK_EMPTY;
+}
+
+static unsigned long
+lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu;
+ unsigned long flags;
+ unsigned long count = 0;
+
+ /* Snapshot count of all CPUs */
+ for_each_possible_cpu(cpu) {
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ int _count = READ_ONCE(rdp->lazy_len);
+
+ if (_count == 0)
+ continue;
+ rcu_nocb_lock_irqsave(rdp, flags);
+ WRITE_ONCE(rdp->lazy_len, 0);
+ rcu_nocb_unlock_irqrestore(rdp, flags);
+ wake_nocb_gp(rdp, false);
+ sc->nr_to_scan -= _count;
+ count += _count;
+ if (sc->nr_to_scan <= 0)
+ break;
}
-#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+ return count ? count : SHRINK_STOP;
+}
+
+static struct shrinker lazy_rcu_shrinker = {
+ .count_objects = lazy_rcu_shrink_count,
+ .scan_objects = lazy_rcu_shrink_scan,
+ .batch = 0,
+ .seeks = DEFAULT_SEEKS,
+};
+
+void __init rcu_init_nohz(void)
+{
+ int cpu;
+ struct rcu_data *rdp;
+ const struct cpumask *cpumask = NULL;
+
+#if defined(CONFIG_NO_HZ_FULL)
+ if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask))
+ cpumask = tick_nohz_full_mask;
+#endif
- if (need_rcu_nocb_mask) {
+ if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) &&
+ !rcu_state.nocb_is_setup && !cpumask)
+ cpumask = cpu_possible_mask;
+
+ if (cpumask) {
if (!cpumask_available(rcu_nocb_mask)) {
if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
return;
}
}
+
+ cpumask_or(rcu_nocb_mask, rcu_nocb_mask, cpumask);
rcu_state.nocb_is_setup = true;
}
if (!rcu_state.nocb_is_setup)
return;
-#if defined(CONFIG_NO_HZ_FULL)
- if (tick_nohz_full_running)
- cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
-#endif /* #if defined(CONFIG_NO_HZ_FULL) */
-
- if (offload_all)
- cpumask_setall(rcu_nocb_mask);
+ if (register_shrinker(&lazy_rcu_shrinker, "rcu-lazy"))
+ pr_err("Failed to register lazy_rcu shrinker!\n");
if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
@@ -1284,6 +1429,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
raw_spin_lock_init(&rdp->nocb_gp_lock);
timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
rcu_cblist_init(&rdp->nocb_bypass);
+ WRITE_ONCE(rdp->lazy_len, 0);
mutex_init(&rdp->nocb_gp_kthread_mutex);
}
@@ -1564,14 +1710,19 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
{
}
+static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+{
+ return false;
+}
+
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- unsigned long j)
+ unsigned long j, bool lazy)
{
return true;
}
static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
- bool *was_alldone, unsigned long flags)
+ bool *was_alldone, unsigned long flags, bool lazy)
{
return false;
}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e3142ee35fc6..7b0fe741a088 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1221,11 +1221,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
* We don't include outgoingcpu in the affinity set, use -1 if there is
* no outgoing CPU. If there are no CPUs left in the affinity set,
* this function allows the kthread to execute on any CPU.
+ *
+ * Any future concurrent calls are serialized via ->boost_kthread_mutex.
*/
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
{
struct task_struct *t = rnp->boost_kthread_task;
- unsigned long mask = rcu_rnp_online_cpus(rnp);
+ unsigned long mask;
cpumask_var_t cm;
int cpu;
@@ -1234,6 +1236,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
return;
mutex_lock(&rnp->boost_kthread_mutex);
+ mask = rcu_rnp_online_cpus(rnp);
for_each_leaf_node_possible_cpu(rnp, cpu)
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
cpu != outgoingcpu)
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 738842c4886b..f5e6a2f95a2a 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -224,7 +224,7 @@ void rcu_test_sync_prims(void)
synchronize_rcu_expedited();
}
-#if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU)
+#if !defined(CONFIG_TINY_RCU)
/*
* Switch to run-time mode once RCU has fully initialized.
@@ -239,7 +239,7 @@ static int __init rcu_set_runtime_mode(void)
}
core_initcall(rcu_set_runtime_mode);
-#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
+#endif /* #if !defined(CONFIG_TINY_RCU) */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
@@ -559,10 +559,8 @@ static void early_boot_test_call_rcu(void)
struct early_boot_kfree_rcu *rhp;
call_rcu(&head, test_callback);
- if (IS_ENABLED(CONFIG_SRCU)) {
- early_srcu_cookie = start_poll_synchronize_srcu(&early_srcu);
- call_srcu(&early_srcu, &shead, test_callback);
- }
+ early_srcu_cookie = start_poll_synchronize_srcu(&early_srcu);
+ call_srcu(&early_srcu, &shead, test_callback);
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
if (!WARN_ON_ONCE(!rhp))
kfree_rcu(rhp, rh);
@@ -585,11 +583,9 @@ static int rcu_verify_early_boot_tests(void)
if (rcu_self_test) {
early_boot_test_counter++;
rcu_barrier();
- if (IS_ENABLED(CONFIG_SRCU)) {
- early_boot_test_counter++;
- srcu_barrier(&early_srcu);
- WARN_ON_ONCE(!poll_state_synchronize_srcu(&early_srcu, early_srcu_cookie));
- }
+ early_boot_test_counter++;
+ srcu_barrier(&early_srcu);
+ WARN_ON_ONCE(!poll_state_synchronize_srcu(&early_srcu, early_srcu_cookie));
}
if (rcu_self_test_counter != early_boot_test_counter) {
WARN_ON(1);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 188c305aeb8b..c6d9dec11b74 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -267,13 +267,14 @@ int proc_dostring(struct ctl_table *table, int write,
ppos);
}
-static size_t proc_skip_spaces(char **buf)
+static void proc_skip_spaces(char **buf, size_t *size)
{
- size_t ret;
- char *tmp = skip_spaces(*buf);
- ret = tmp - *buf;
- *buf = tmp;
- return ret;
+ while (*size) {
+ if (!isspace(**buf))
+ break;
+ (*size)--;
+ (*buf)++;
+ }
}
static void proc_skip_char(char **buf, size_t *size, const char v)
@@ -342,13 +343,12 @@ static int proc_get_long(char **buf, size_t *size,
unsigned long *val, bool *neg,
const char *perm_tr, unsigned perm_tr_len, char *tr)
{
- int len;
char *p, tmp[TMPBUFLEN];
+ ssize_t len = *size;
- if (!*size)
+ if (len <= 0)
return -EINVAL;
- len = *size;
if (len > TMPBUFLEN - 1)
len = TMPBUFLEN - 1;
@@ -521,7 +521,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
bool neg;
if (write) {
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (!left)
break;
@@ -548,7 +548,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
if (!write && !first && left && !err)
proc_put_char(&buffer, &left, '\n');
if (write && !err && left)
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (write && first)
return err ? : -EINVAL;
*lenp -= left;
@@ -590,7 +590,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (!left) {
err = -EINVAL;
goto out_free;
@@ -610,7 +610,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
}
if (!err && left)
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
out_free:
if (err)
@@ -1075,7 +1075,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
if (write) {
bool neg;
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (!left)
break;
@@ -1104,7 +1104,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
if (!write && !first && left && !err)
proc_put_char(&buffer, &left, '\n');
if (write && !err)
- left -= proc_skip_spaces(&p);
+ proc_skip_spaces(&p, &left);
if (write && first)
return err ? : -EINVAL;
*lenp -= left;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a7fe0e115272..5cfc95a52bc3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2180,10 +2180,12 @@ void tracing_reset_online_cpus(struct array_buffer *buf)
}
/* Must have trace_types_lock held */
-void tracing_reset_all_online_cpus(void)
+void tracing_reset_all_online_cpus_unlocked(void)
{
struct trace_array *tr;
+ lockdep_assert_held(&trace_types_lock);
+
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
if (!tr->clear_trace)
continue;
@@ -2195,6 +2197,13 @@ void tracing_reset_all_online_cpus(void)
}
}
+void tracing_reset_all_online_cpus(void)
+{
+ mutex_lock(&trace_types_lock);
+ tracing_reset_all_online_cpus_unlocked();
+ mutex_unlock(&trace_types_lock);
+}
+
/*
* The tgid_map array maps from pid to tgid; i.e. the value stored at index i
* is the tgid last observed corresponding to pid=i.
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 54ee5711c729..d42e24507152 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -580,6 +580,7 @@ int tracing_is_enabled(void);
void tracing_reset_online_cpus(struct array_buffer *buf);
void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
+void tracing_reset_all_online_cpus_unlocked(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
int tracing_open_generic_tr(struct inode *inode, struct file *filp);
bool tracing_is_disabled(void);
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index 154996684fb5..4376887e0d8a 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -118,6 +118,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type
if (ret)
break;
}
+ tracing_reset_all_online_cpus();
mutex_unlock(&event_mutex);
out:
argv_free(argv);
@@ -214,6 +215,7 @@ int dyn_events_release_all(struct dyn_event_operations *type)
break;
}
out:
+ tracing_reset_all_online_cpus();
mutex_unlock(&event_mutex);
return ret;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0356cae0cf74..f71ea6e79b3c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2880,7 +2880,10 @@ static int probe_remove_event_call(struct trace_event_call *call)
* TRACE_REG_UNREGISTER.
*/
if (file->flags & EVENT_FILE_FL_ENABLED)
- return -EBUSY;
+ goto busy;
+
+ if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
+ tr->clear_trace = true;
/*
* The do_for_each_event_file_safe() is
* a double loop. After finding the call for this
@@ -2893,6 +2896,12 @@ static int probe_remove_event_call(struct trace_event_call *call)
__trace_remove_event_call(call);
return 0;
+ busy:
+ /* No need to clear the trace now */
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ tr->clear_trace = false;
+ }
+ return -EBUSY;
}
/* Remove an event_call */
@@ -2972,7 +2981,7 @@ static void trace_module_remove_events(struct module *mod)
* over from this module may be passed to the new module events and
* unexpected results may occur.
*/
- tracing_reset_all_online_cpus();
+ tracing_reset_all_online_cpus_unlocked();
}
static int trace_module_notify(struct notifier_block *self,
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 48465f7e97b4..1c82478e8dff 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -983,7 +983,7 @@ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data,
* A trigger can define one or more variables. If any one of them is
* currently referenced by any other trigger, this function will
* determine that.
-
+ *
* Typically used to determine whether or not a trigger can be removed
* - if there are any references to a trigger's variables, it cannot.
*
@@ -3226,7 +3226,7 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data,
* events. However, for convenience, users are allowed to directly
* specify an event field in an action, which will be automatically
* converted into a variable on their behalf.
-
+ *
* This function creates a field variable with the name var_name on
* the hist trigger currently being defined on the target event. If
* subsys_name and event_name are specified, this function simply
@@ -5143,6 +5143,9 @@ static void event_hist_trigger(struct event_trigger_data *data,
void *key = NULL;
unsigned int i;
+ if (unlikely(!rbe))
+ return;
+
memset(compound_key, 0, hist_data->key_size);
for_each_hist_key_field(i, hist_data) {
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 29fbfb27c2b2..c3b582d19b62 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1425,7 +1425,6 @@ int synth_event_delete(const char *event_name)
mutex_unlock(&event_mutex);
if (mod) {
- mutex_lock(&trace_types_lock);
/*
* It is safest to reset the ring buffer if the module
* being unloaded registered any events that were
@@ -1437,7 +1436,6 @@ int synth_event_delete(const char *event_name)
* occur.
*/
tracing_reset_all_online_cpus();
- mutex_unlock(&trace_types_lock);
}
return ret;
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index ae78c2d53c8a..539b08ae7020 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -1100,8 +1100,10 @@ static int user_event_create(const char *raw_command)
group = current_user_event_group();
- if (!group)
+ if (!group) {
+ kfree(name);
return -ENOENT;
+ }
mutex_lock(&group->reg_mutex);
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 78d536d3ff3d..4300c5dc4e5d 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -917,7 +917,7 @@ void osnoise_trace_irq_entry(int id)
void osnoise_trace_irq_exit(int id, const char *desc)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
- int duration;
+ s64 duration;
if (!osn_var->sampling)
return;
@@ -1048,7 +1048,7 @@ static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
- int duration;
+ s64 duration;
if (!osn_var->sampling)
return;
@@ -1144,7 +1144,7 @@ thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
static void
thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
{
- int duration;
+ s64 duration;
if (!osn_var->sampling)
return;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7cd5f5e7e0a1..07895deca271 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1771,7 +1771,7 @@ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
rwork->wq = wq;
- call_rcu(&rwork->rcu, rcu_work_rcufn);
+ call_rcu_hurry(&rwork->rcu, rcu_work_rcufn);
return true;
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a1005415f0f4..3638b3424be5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -399,6 +399,7 @@ config FRAME_WARN
default 2048 if GCC_PLUGIN_LATENT_ENTROPY
default 2048 if PARISC
default 1536 if (!64BIT && XTENSA)
+ default 1280 if KASAN && !64BIT
default 1024 if !64BIT
default 2048 if 64BIT
help
@@ -1874,8 +1875,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT
If unsure, say N.
config FUNCTION_ERROR_INJECTION
- def_bool y
+ bool "Fault-injections of functions"
depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+ help
+ Add fault injections into various functions that are annotated with
+ ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return
+ value of theses functions. This is useful to test error paths of code.
+
+ If unsure, say N
config FAULT_INJECTION
bool "Fault-injection framework"
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index ca09b1cf8ee9..836f70393e22 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -37,7 +37,7 @@ menuconfig KASAN
(HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
HAVE_ARCH_KASAN_HW_TAGS
- depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
+ depends on (SLUB && SYSFS && !SLUB_TINY) || (SLAB && !DEBUG_SLAB)
select STACKDEPOT_ALWAYS_INIT
help
Enables KASAN (Kernel Address Sanitizer) - a dynamic memory safety
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 47a693c45864..375575a5a0e3 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -125,7 +125,7 @@ config KCSAN_SKIP_WATCH
default 4000
help
The number of per-CPU memory operations to skip, before another
- watchpoint is set up, i.e. one in KCSAN_WATCH_SKIP per-CPU
+ watchpoint is set up, i.e. one in KCSAN_SKIP_WATCH per-CPU
memory operations are used to set up a watchpoint. A smaller value
results in more aggressive race detection, whereas a larger value
improves system performance at the cost of missing some races.
@@ -135,8 +135,8 @@ config KCSAN_SKIP_WATCH_RANDOMIZE
default y
help
If instruction skip count should be randomized, where the maximum is
- KCSAN_WATCH_SKIP. If false, the chosen value is always
- KCSAN_WATCH_SKIP.
+ KCSAN_SKIP_WATCH. If false, the chosen value is always
+ KCSAN_SKIP_WATCH.
config KCSAN_INTERRUPT_WATCHER
bool "Interruptible watchers" if !KCSAN_STRICT
diff --git a/lib/is_signed_type_kunit.c b/lib/is_signed_type_kunit.c
index 207207522925..0a7f6ae62839 100644
--- a/lib/is_signed_type_kunit.c
+++ b/lib/is_signed_type_kunit.c
@@ -21,11 +21,7 @@ static void is_signed_type_test(struct kunit *test)
KUNIT_EXPECT_EQ(test, is_signed_type(bool), false);
KUNIT_EXPECT_EQ(test, is_signed_type(signed char), true);
KUNIT_EXPECT_EQ(test, is_signed_type(unsigned char), false);
-#ifdef __CHAR_UNSIGNED__
KUNIT_EXPECT_EQ(test, is_signed_type(char), false);
-#else
- KUNIT_EXPECT_EQ(test, is_signed_type(char), true);
-#endif
KUNIT_EXPECT_EQ(test, is_signed_type(int), true);
KUNIT_EXPECT_EQ(test, is_signed_type(unsigned int), false);
KUNIT_EXPECT_EQ(test, is_signed_type(long), true);
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e5c5315da274..668f6aa6a75d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -230,7 +230,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
percpu_ref_noop_confirm_switch;
percpu_ref_get(ref); /* put after confirmation */
- call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu);
+ call_rcu_hurry(&ref->data->rcu,
+ percpu_ref_switch_to_atomic_rcu);
}
static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index 7a0564d7cb7a..bdf358d520b4 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -9,10 +9,25 @@
static struct kunit_resource resource;
static int slab_errors;
+/*
+ * Wrapper function for kmem_cache_create(), which reduces 2 parameters:
+ * 'align' and 'ctor', and sets SLAB_SKIP_KFENCE flag to avoid getting an
+ * object from kfence pool, where the operation could be caught by both
+ * our test and kfence sanity check.
+ */
+static struct kmem_cache *test_kmem_cache_create(const char *name,
+ unsigned int size, slab_flags_t flags)
+{
+ struct kmem_cache *s = kmem_cache_create(name, size, 0,
+ (flags | SLAB_NO_USER_FLAGS), NULL);
+ s->flags |= SLAB_SKIP_KFENCE;
+ return s;
+}
+
static void test_clobber_zone(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_alloc", 64, 0,
- SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_alloc", 64,
+ SLAB_RED_ZONE);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
@@ -29,8 +44,8 @@ static void test_clobber_zone(struct kunit *test)
#ifndef CONFIG_KASAN
static void test_next_pointer(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_next_ptr_free", 64, 0,
- SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_next_ptr_free",
+ 64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
unsigned long tmp;
unsigned long *ptr_addr;
@@ -74,8 +89,8 @@ static void test_next_pointer(struct kunit *test)
static void test_first_word(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_1th_word_free", 64, 0,
- SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_1th_word_free",
+ 64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
@@ -89,8 +104,8 @@ static void test_first_word(struct kunit *test)
static void test_clobber_50th_byte(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_50th_word_free", 64, 0,
- SLAB_POISON|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_50th_word_free",
+ 64, SLAB_POISON);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kmem_cache_free(s, p);
@@ -105,8 +120,8 @@ static void test_clobber_50th_byte(struct kunit *test)
static void test_clobber_redzone_free(struct kunit *test)
{
- struct kmem_cache *s = kmem_cache_create("TestSlub_RZ_free", 64, 0,
- SLAB_RED_ZONE|SLAB_NO_USER_FLAGS, NULL);
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_free", 64,
+ SLAB_RED_ZONE);
u8 *p = kmem_cache_alloc(s, GFP_KERNEL);
kasan_disable_current();
@@ -120,6 +135,27 @@ static void test_clobber_redzone_free(struct kunit *test)
kmem_cache_destroy(s);
}
+static void test_kmalloc_redzone_access(struct kunit *test)
+{
+ struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
+ SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
+ u8 *p = kmalloc_trace(s, GFP_KERNEL, 18);
+
+ kasan_disable_current();
+
+ /* Suppress the -Warray-bounds warning */
+ OPTIMIZER_HIDE_VAR(p);
+ p[18] = 0xab;
+ p[19] = 0xab;
+
+ validate_slab_cache(s);
+ KUNIT_EXPECT_EQ(test, 2, slab_errors);
+
+ kasan_enable_current();
+ kmem_cache_free(s, p);
+ kmem_cache_destroy(s);
+}
+
static int test_init(struct kunit *test)
{
slab_errors = 0;
@@ -139,6 +175,7 @@ static struct kunit_case test_cases[] = {
#endif
KUNIT_CASE(test_clobber_redzone_free),
+ KUNIT_CASE(test_kmalloc_redzone_access),
{}
};
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 4bd15a593fbd..d6a5d4b5f884 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -179,18 +179,6 @@ test_number(void)
* behaviour.
*/
test("00|0|0|0|0", "%.2d|%.1d|%.0d|%.*d|%1.0d", 0, 0, 0, 0, 0, 0);
-#ifndef __CHAR_UNSIGNED__
- {
- /*
- * Passing a 'char' to a %02x specifier doesn't do
- * what was presumably the intention when char is
- * signed and the value is negative. One must either &
- * with 0xff or cast to u8.
- */
- char val = -16;
- test("0xfffffff0|0xf0|0xf0", "%#02x|%#02x|%#02x", val, val & 0xff, (u8)val);
- }
-#endif
}
static void __init
@@ -704,31 +692,29 @@ flags(void)
static void __init fwnode_pointer(void)
{
- const struct software_node softnodes[] = {
- { .name = "first", },
- { .name = "second", .parent = &softnodes[0], },
- { .name = "third", .parent = &softnodes[1], },
- { NULL /* Guardian */ }
- };
- const char * const full_name = "first/second/third";
+ const struct software_node first = { .name = "first" };
+ const struct software_node second = { .name = "second", .parent = &first };
+ const struct software_node third = { .name = "third", .parent = &second };
+ const struct software_node *group[] = { &first, &second, &third, NULL };
const char * const full_name_second = "first/second";
+ const char * const full_name_third = "first/second/third";
const char * const second_name = "second";
const char * const third_name = "third";
int rval;
- rval = software_node_register_nodes(softnodes);
+ rval = software_node_register_node_group(group);
if (rval) {
pr_warn("cannot register softnodes; rval %d\n", rval);
return;
}
- test(full_name_second, "%pfw", software_node_fwnode(&softnodes[1]));
- test(full_name, "%pfw", software_node_fwnode(&softnodes[2]));
- test(full_name, "%pfwf", software_node_fwnode(&softnodes[2]));
- test(second_name, "%pfwP", software_node_fwnode(&softnodes[1]));
- test(third_name, "%pfwP", software_node_fwnode(&softnodes[2]));
+ test(full_name_second, "%pfw", software_node_fwnode(&second));
+ test(full_name_third, "%pfw", software_node_fwnode(&third));
+ test(full_name_third, "%pfwf", software_node_fwnode(&third));
+ test(second_name, "%pfwP", software_node_fwnode(&second));
+ test(third_name, "%pfwP", software_node_fwnode(&third));
- software_node_unregister_nodes(softnodes);
+ software_node_unregister_node_group(group);
}
static void __init fourcc_pointer(void)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 24f37bab8bc1..5b0611c00956 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -866,7 +866,7 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
* kptr_restrict==1 cannot be used in IRQ context
* because its test for CAP_SYSLOG would be meaningless.
*/
- if (in_irq() || in_serving_softirq() || in_nmi()) {
+ if (in_hardirq() || in_serving_softirq() || in_nmi()) {
if (spec.field_width == -1)
spec.field_width = 2 * sizeof(ptr);
return error_string(buf, end, "pK-error", spec);
diff --git a/mm/Kconfig b/mm/Kconfig
index 57e1d8c5b505..623d95659ff9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -219,17 +219,43 @@ config SLUB
and has enhanced diagnostics. SLUB is the default choice for
a slab allocator.
-config SLOB
+config SLOB_DEPRECATED
depends on EXPERT
- bool "SLOB (Simple Allocator)"
+ bool "SLOB (Simple Allocator - DEPRECATED)"
depends on !PREEMPT_RT
help
+ Deprecated and scheduled for removal in a few cycles. SLUB
+ recommended as replacement. CONFIG_SLUB_TINY can be considered
+ on systems with 16MB or less RAM.
+
+ If you need SLOB to stay, please contact [email protected] and
+ people listed in the SLAB ALLOCATOR section of MAINTAINERS file,
+ with your use case.
+
SLOB replaces the stock allocator with a drastically simpler
allocator. SLOB is generally more space efficient but
does not perform as well on large systems.
endchoice
+config SLOB
+ bool
+ default y
+ depends on SLOB_DEPRECATED
+
+config SLUB_TINY
+ bool "Configure SLUB for minimal memory footprint"
+ depends on SLUB && EXPERT
+ select SLAB_MERGE_DEFAULT
+ help
+ Configures the SLUB allocator in a way to achieve minimal memory
+ footprint, sacrificing scalability, debugging and other features.
+ This is intended only for the smallest system that had used the
+ SLOB allocator and is not recommended for systems with more than
+ 16MB RAM.
+
+ If unsure, say N.
+
config SLAB_MERGE_DEFAULT
bool "Allow slab caches to be merged"
default y
@@ -247,7 +273,7 @@ config SLAB_MERGE_DEFAULT
config SLAB_FREELIST_RANDOM
bool "Randomize slab freelist"
- depends on SLAB || SLUB
+ depends on SLAB || (SLUB && !SLUB_TINY)
help
Randomizes the freelist order used on creating new pages. This
security feature reduces the predictability of the kernel slab
@@ -255,7 +281,7 @@ config SLAB_FREELIST_RANDOM
config SLAB_FREELIST_HARDENED
bool "Harden slab freelist metadata"
- depends on SLAB || SLUB
+ depends on SLAB || (SLUB && !SLUB_TINY)
help
Many kernel heap attacks try to target slab cache metadata and
other infrastructure. This options makes minor performance
@@ -267,7 +293,7 @@ config SLAB_FREELIST_HARDENED
config SLUB_STATS
default n
bool "Enable SLUB performance statistics"
- depends on SLUB && SYSFS
+ depends on SLUB && SYSFS && !SLUB_TINY
help
SLUB statistics are useful to debug SLUBs allocation behavior in
order find ways to optimize the allocator. This should never be
@@ -279,7 +305,7 @@ config SLUB_STATS
config SLUB_CPU_PARTIAL
default y
- depends on SLUB && SMP
+ depends on SLUB && SMP && !SLUB_TINY
bool "SLUB per cpu partial cache"
help
Per cpu partial caches accelerate objects allocation and freeing
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index ce8dded36de9..fca699ad1fb0 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -56,7 +56,7 @@ config DEBUG_SLAB
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
- depends on SLUB && SYSFS
+ depends on SLUB && SYSFS && !SLUB_TINY
select STACKDEPOT if STACKTRACE_SUPPORT
help
SLUB has extensive debug support features. Disabling these can
diff --git a/mm/compaction.c b/mm/compaction.c
index c51f7f545afe..1f6da31dd9a5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -985,28 +985,28 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
}
/*
+ * Be careful not to clear PageLRU until after we're
+ * sure the page is not being freed elsewhere -- the
+ * page release code relies on it.
+ */
+ if (unlikely(!get_page_unless_zero(page)))
+ goto isolate_fail;
+
+ /*
* Migration will fail if an anonymous page is pinned in memory,
* so avoid taking lru_lock and isolating it unnecessarily in an
* admittedly racy check.
*/
mapping = page_mapping(page);
- if (!mapping && page_count(page) > page_mapcount(page))
- goto isolate_fail;
+ if (!mapping && (page_count(page) - 1) > total_mapcount(page))
+ goto isolate_fail_put;
/*
* Only allow to migrate anonymous pages in GFP_NOFS context
* because those do not depend on fs locks.
*/
if (!(cc->gfp_mask & __GFP_FS) && mapping)
- goto isolate_fail;
-
- /*
- * Be careful not to clear PageLRU until after we're
- * sure the page is not being freed elsewhere -- the
- * page release code relies on it.
- */
- if (unlikely(!get_page_unless_zero(page)))
- goto isolate_fail;
+ goto isolate_fail_put;
/* Only take pages on LRU: a check now makes later tests safe */
if (!PageLRU(page))
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 5ce403378c20..07e5f1bdf025 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme(
&wmarks);
}
+static void damon_sysfs_update_scheme(struct damos *scheme,
+ struct damon_sysfs_scheme *sysfs_scheme)
+{
+ struct damon_sysfs_access_pattern *access_pattern =
+ sysfs_scheme->access_pattern;
+ struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+ struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+ struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
+
+ scheme->pattern.min_sz_region = access_pattern->sz->min;
+ scheme->pattern.max_sz_region = access_pattern->sz->max;
+ scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min;
+ scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max;
+ scheme->pattern.min_age_region = access_pattern->age->min;
+ scheme->pattern.max_age_region = access_pattern->age->max;
+
+ scheme->action = sysfs_scheme->action;
+
+ scheme->quota.ms = sysfs_quotas->ms;
+ scheme->quota.sz = sysfs_quotas->sz;
+ scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms;
+ scheme->quota.weight_sz = sysfs_weights->sz;
+ scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses;
+ scheme->quota.weight_age = sysfs_weights->age;
+
+ scheme->wmarks.metric = sysfs_wmarks->metric;
+ scheme->wmarks.interval = sysfs_wmarks->interval_us;
+ scheme->wmarks.high = sysfs_wmarks->high;
+ scheme->wmarks.mid = sysfs_wmarks->mid;
+ scheme->wmarks.low = sysfs_wmarks->low;
+}
+
static int damon_sysfs_set_schemes(struct damon_ctx *ctx,
struct damon_sysfs_schemes *sysfs_schemes)
{
- int i;
+ struct damos *scheme, *next;
+ int i = 0;
+
+ damon_for_each_scheme_safe(scheme, next, ctx) {
+ if (i < sysfs_schemes->nr)
+ damon_sysfs_update_scheme(scheme,
+ sysfs_schemes->schemes_arr[i]);
+ else
+ damon_destroy_scheme(scheme);
+ i++;
+ }
- for (i = 0; i < sysfs_schemes->nr; i++) {
+ for (; i < sysfs_schemes->nr; i++) {
struct damos *scheme, *next;
scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
diff --git a/mm/gup.c b/mm/gup.c
index fe195d47de74..3b7bc2c1fd44 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2852,7 +2852,7 @@ static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned lo
next = pud_addr_end(addr, end);
if (unlikely(!pud_present(pud)))
return 0;
- if (unlikely(pud_huge(pud))) {
+ if (unlikely(pud_huge(pud) || pud_devmap(pud))) {
if (!gup_huge_pud(pud, pudp, addr, next, flags,
pages, nr))
return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1385c3b6c96..e36ca75311a5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5206,17 +5206,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
- /*
- * Unlock and free the vma lock before releasing i_mmap_rwsem. When
- * the vma_lock is freed, this makes the vma ineligible for pmd
- * sharing. And, i_mmap_rwsem is required to set up pmd sharing.
- * This is important as page tables for this unmapped range will
- * be asynchrously deleted. If the page tables are shared, there
- * will be issues when accessed by someone else.
- */
- __hugetlb_vma_unlock_write_free(vma);
-
- i_mmap_unlock_write(vma->vm_file->f_mapping);
+ if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */
+ /*
+ * Unlock and free the vma lock before releasing i_mmap_rwsem.
+ * When the vma_lock is freed, this makes the vma ineligible
+ * for pmd sharing. And, i_mmap_rwsem is required to set up
+ * pmd sharing. This is important as page tables for this
+ * unmapped range will be asynchrously deleted. If the page
+ * tables are shared, there will be issues when accessed by
+ * someone else.
+ */
+ __hugetlb_vma_unlock_write_free(vma);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index d8b5590f9484..b076f597a378 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -450,15 +450,22 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
__memset(alloc_meta, 0, sizeof(*alloc_meta));
}
-size_t kasan_metadata_size(struct kmem_cache *cache)
+size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object)
{
+ struct kasan_cache *info = &cache->kasan_info;
+
if (!kasan_requires_meta())
return 0;
- return (cache->kasan_info.alloc_meta_offset ?
- sizeof(struct kasan_alloc_meta) : 0) +
- ((cache->kasan_info.free_meta_offset &&
- cache->kasan_info.free_meta_offset != KASAN_NO_FREE_META) ?
- sizeof(struct kasan_free_meta) : 0);
+
+ if (in_object)
+ return (info->free_meta_offset ?
+ 0 : sizeof(struct kasan_free_meta));
+ else
+ return (info->alloc_meta_offset ?
+ sizeof(struct kasan_alloc_meta) : 0) +
+ ((info->free_meta_offset &&
+ info->free_meta_offset != KASAN_NO_FREE_META) ?
+ sizeof(struct kasan_free_meta) : 0);
}
static void __kasan_record_aux_stack(void *addr, bool can_alloc)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a8d5ef2a77d2..3703a56571c1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1051,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
_pmd = pmdp_collapse_flush(vma, address, pmd);
spin_unlock(pmd_ptl);
mmu_notifier_invalidate_range_end(&range);
+ tlb_remove_table_sync_one();
spin_lock(pte_ptl);
result = __collapse_huge_page_isolate(vma, address, pte, cc,
@@ -1379,16 +1380,43 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr,
return SCAN_SUCCEED;
}
+/*
+ * A note about locking:
+ * Trying to take the page table spinlocks would be useless here because those
+ * are only used to synchronize:
+ *
+ * - modifying terminal entries (ones that point to a data page, not to another
+ * page table)
+ * - installing *new* non-terminal entries
+ *
+ * Instead, we need roughly the same kind of protection as free_pgtables() or
+ * mm_take_all_locks() (but only for a single VMA):
+ * The mmap lock together with this VMA's rmap locks covers all paths towards
+ * the page table entries we're messing with here, except for hardware page
+ * table walks and lockless_pages_from_mm().
+ */
static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- spinlock_t *ptl;
pmd_t pmd;
+ struct mmu_notifier_range range;
mmap_assert_write_locked(mm);
- ptl = pmd_lock(vma->vm_mm, pmdp);
+ if (vma->vm_file)
+ lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem);
+ /*
+ * All anon_vmas attached to the VMA have the same root and are
+ * therefore locked by the same lock.
+ */
+ if (vma->anon_vma)
+ lockdep_assert_held_write(&vma->anon_vma->root->rwsem);
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr,
+ addr + HPAGE_PMD_SIZE);
+ mmu_notifier_invalidate_range_start(&range);
pmd = pmdp_collapse_flush(vma, addr, pmdp);
- spin_unlock(ptl);
+ tlb_remove_table_sync_one();
+ mmu_notifier_invalidate_range_end(&range);
mm_dec_nr_ptes(mm);
page_table_check_pte_clear_range(mm, addr, pmd);
pte_free(mm, pmd_pgtable(pmd));
@@ -1439,6 +1467,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false))
return SCAN_VMA_CHECK;
+ /*
+ * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings
+ * that got written to. Without this, we'd have to also lock the
+ * anon_vma if one exists.
+ */
+ if (vma->anon_vma)
+ return SCAN_VMA_CHECK;
+
/* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
if (userfaultfd_wp(vma))
return SCAN_PTE_UFFD_WP;
@@ -1472,6 +1508,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
goto drop_hpage;
}
+ /*
+ * We need to lock the mapping so that from here on, only GUP-fast and
+ * hardware page walks can access the parts of the page tables that
+ * we're operating on.
+ * See collapse_and_free_pmd().
+ */
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+
+ /*
+ * This spinlock should be unnecessary: Nobody else should be accessing
+ * the page tables under spinlock protection here, only
+ * lockless_pages_from_mm() and the hardware page walker can access page
+ * tables while all the high-level locks are held in write mode.
+ */
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
result = SCAN_FAIL;
@@ -1526,6 +1576,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
/* step 4: remove pte entries */
collapse_and_free_pmd(mm, vma, haddr, pmd);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+
maybe_install_pmd:
/* step 5: install pmd entry */
result = install_pmd
@@ -1539,6 +1591,7 @@ drop_hpage:
abort:
pte_unmap_unlock(start_pte, ptl);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
goto drop_hpage;
}
@@ -1595,7 +1648,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* An alternative would be drop the check, but check that page
* table is clear before calling pmdp_collapse_flush() under
* ptl. It has higher chance to recover THP for the VMA, but
- * has higher cost too.
+ * has higher cost too. It would also probably require locking
+ * the anon_vma.
*/
if (vma->anon_vma) {
result = SCAN_PAGE_ANON;
diff --git a/mm/madvise.c b/mm/madvise.c
index c7105ec6d08c..b913ba6efc10 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
* Application no longer needs these pages. If the pages are dirty,
* it's OK to just throw them away. The app will be more careful about
* data it wants to keep. Be sure to free swap resources too. The
- * zap_page_range call sets things up for shrink_active_list to actually free
- * these pages later if no one else has touched them in the meantime,
+ * zap_page_range_single call sets things up for shrink_active_list to actually
+ * free these pages later if no one else has touched them in the meantime,
* although we could add these pages to a global reuse list for
* shrink_active_list to pick up before reclaiming other pages.
*
@@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- zap_page_range(vma, start, end - start);
+ zap_page_range_single(vma, start, end - start, NULL);
return 0;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a1a35c12635e..266a1ab05434 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4832,6 +4832,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
unsigned int efd, cfd;
struct fd efile;
struct fd cfile;
+ struct dentry *cdentry;
const char *name;
char *endp;
int ret;
@@ -4886,6 +4887,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
goto out_put_cfile;
/*
+ * The control file must be a regular cgroup1 file. As a regular cgroup
+ * file can't be renamed, it's safe to access its name afterwards.
+ */
+ cdentry = cfile.file->f_path.dentry;
+ if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) {
+ ret = -EINVAL;
+ goto out_put_cfile;
+ }
+
+ /*
* Determine the event callbacks and set them in @event. This used
* to be done via struct cftype but cgroup core no longer knows
* about these events. The following is crude but the whole thing
@@ -4893,7 +4904,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
*
* DO NOT ADD NEW FILES.
*/
- name = cfile.file->f_path.dentry->d_name.name;
+ name = cdentry->d_name.name;
if (!strcmp(name, "memory.usage_in_bytes")) {
event->register_event = mem_cgroup_usage_register_event;
@@ -4917,7 +4928,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
* automatically removed on cgroup destruction but the removal is
* asynchronous, so take an extra ref on @css.
*/
- cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent,
+ cfile_css = css_tryget_online_from_dir(cdentry->d_parent,
&memory_cgrp_subsys);
ret = -EINVAL;
if (IS_ERR(cfile_css))
diff --git a/mm/memory.c b/mm/memory.c
index 8a6d5c823f91..8c8420934d60 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
return ret;
}
-/*
- * Parameter block passed down to zap_pte_range in exceptional cases.
- */
-struct zap_details {
- struct folio *single_folio; /* Locked folio to be unmapped */
- bool even_cows; /* Zap COWed private pages too? */
- zap_flags_t zap_flags; /* Extra flags for zapping */
-};
-
/* Whether we should zap all COWed (private) pages too */
static inline bool should_zap_cows(struct zap_details *details)
{
@@ -1720,7 +1711,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
{
struct mmu_notifier_range range;
struct zap_details details = {
- .zap_flags = ZAP_FLAG_DROP_MARKER,
+ .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP,
/* Careful - we need to zap private pages too! */
.even_cows = true,
};
@@ -1774,19 +1765,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
*
* The range must fit into one VMA.
*/
-static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
+void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *details)
{
+ const unsigned long end = address + size;
struct mmu_notifier_range range;
struct mmu_gather tlb;
lru_add_drain();
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
- address, address + size);
+ address, end);
+ if (is_vm_hugetlb_page(vma))
+ adjust_range_if_pmd_sharing_possible(vma, &range.start,
+ &range.end);
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
- unmap_single_vma(&tlb, vma, address, range.end, details);
+ /*
+ * unmap 'address-end' not 'range.start-range.end' as range
+ * could have been expanded for hugetlb pmd sharing.
+ */
+ unmap_single_vma(&tlb, vma, address, end, details);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index dff333593a8a..1ee0719bd7a1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -74,13 +74,22 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode)
if (unlikely(!get_page_unless_zero(page)))
goto out;
+ if (unlikely(PageSlab(page)))
+ goto out_putpage;
+ /* Pairs with smp_wmb() in slab freeing, e.g. SLUB's __free_slab() */
+ smp_rmb();
/*
- * Check PageMovable before holding a PG_lock because page's owner
- * assumes anybody doesn't touch PG_lock of newly allocated page
- * so unconditionally grabbing the lock ruins page's owner side.
+ * Check movable flag before taking the page lock because
+ * we use non-atomic bitops on newly allocated page flags so
+ * unconditionally grabbing the lock ruins page's owner side.
*/
if (unlikely(!__PageMovable(page)))
goto out_putpage;
+ /* Pairs with smp_wmb() in slab allocation, e.g. SLUB's alloc_slab_page() */
+ smp_rmb();
+ if (unlikely(PageSlab(page)))
+ goto out_putpage;
+
/*
* As movable pages are not isolated from LRU lists, concurrent
* compaction threads can race against page migration functions
diff --git a/mm/mmap.c b/mm/mmap.c
index 74a84eb33b90..54abd46e6007 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -226,8 +226,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
/* Search one past newbrk */
mas_set(&mas, newbrk);
brkvma = mas_find(&mas, oldbrk);
- BUG_ON(brkvma == NULL);
- if (brkvma->vm_start >= oldbrk)
+ if (!brkvma || brkvma->vm_start >= oldbrk)
goto out; /* mapping intersects with an existing non-brk vma. */
/*
* mm->brk must be protected by write mmap_lock.
@@ -1779,9 +1778,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
*/
pgoff = 0;
get_area = shmem_get_unmapped_area;
- } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- /* Ensures that larger anonymous mappings are THP aligned. */
- get_area = thp_get_unmapped_area;
}
addr = get_area(file, addr, len, pgoff, flags);
@@ -2949,9 +2945,9 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
* Expand the existing vma if possible; Note that singular lists do not
* occur after forking, so the expand will only happen on new VMAs.
*/
- if (vma &&
- (!vma->anon_vma || list_is_singular(&vma->anon_vma_chain)) &&
- ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)) {
+ if (vma && vma->vm_end == addr && !vma_policy(vma) &&
+ can_vma_merge_after(vma, flags, NULL, NULL,
+ addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
mas_set_range(mas, vma->vm_start, addr + len - 1);
if (mas_preallocate(mas, vma, GFP_KERNEL))
return -ENOMEM;
@@ -3038,11 +3034,6 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
goto munmap_failed;
vma = mas_prev(&mas, 0);
- if (!vma || vma->vm_end != addr || vma_policy(vma) ||
- !can_vma_merge_after(vma, flags, NULL, NULL,
- addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL))
- vma = NULL;
-
ret = do_brk_flags(&mas, vma, addr, len, flags);
populate = ((mm->def_flags & VM_LOCKED) != 0);
mmap_write_unlock(mm);
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index add4244e5790..3a2c3f8cad2f 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg)
/* Simply deliver the interrupt */
}
-static void tlb_remove_table_sync_one(void)
+void tlb_remove_table_sync_one(void)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
@@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
-static void tlb_remove_table_sync_one(void) { }
-
static void tlb_remove_table_free(struct mmu_table_batch *batch)
{
__tlb_remove_table_free(batch);
diff --git a/mm/shmem.c b/mm/shmem.c
index c1d8b8a1aa3b..82911fefc2d5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -948,6 +948,15 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
index++;
}
+ /*
+ * When undoing a failed fallocate, we want none of the partial folio
+ * zeroing and splitting below, but shall want to truncate the whole
+ * folio when !uptodate indicates that it was added by this fallocate,
+ * even when [lstart, lend] covers only a part of the folio.
+ */
+ if (unfalloc)
+ goto whole_folios;
+
same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT);
if (folio) {
@@ -973,6 +982,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
folio_put(folio);
}
+whole_folios:
+
index = start;
while (index < end) {
cond_resched();
diff --git a/mm/slab.c b/mm/slab.c
index 59c8e28f7b6a..7a269db050ee 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -234,7 +234,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
parent->shared = NULL;
parent->alien = NULL;
parent->colour_next = 0;
- spin_lock_init(&parent->list_lock);
+ raw_spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
}
@@ -559,9 +559,9 @@ static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
slab_node = slab_nid(slab);
n = get_node(cachep, slab_node);
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, slab_node, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
@@ -684,7 +684,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
struct kmem_cache_node *n = get_node(cachep, node);
if (ac->avail) {
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
/*
* Stuff objects into the remote nodes shared array first.
* That way we could avoid the overhead of putting the objects
@@ -695,7 +695,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
free_block(cachep, ac->entry, ac->avail, node, list);
ac->avail = 0;
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
}
}
@@ -768,9 +768,9 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
slabs_destroy(cachep, &list);
} else {
n = get_node(cachep, slab_node);
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
free_block(cachep, &objp, 1, slab_node, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
return 1;
@@ -811,10 +811,10 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
*/
n = get_node(cachep, node);
if (n) {
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
cachep->num;
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
return 0;
}
@@ -893,7 +893,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
goto fail;
n = get_node(cachep, node);
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
if (n->shared && force_change) {
free_block(cachep, n->shared->entry,
n->shared->avail, node, &list);
@@ -911,7 +911,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
new_alien = NULL;
}
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
/*
@@ -950,7 +950,7 @@ static void cpuup_canceled(long cpu)
if (!n)
continue;
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
/* Free limit for this kmem_cache_node */
n->free_limit -= cachep->batchcount;
@@ -961,7 +961,7 @@ static void cpuup_canceled(long cpu)
nc->avail = 0;
if (!cpumask_empty(mask)) {
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
goto free_slab;
}
@@ -975,7 +975,7 @@ static void cpuup_canceled(long cpu)
alien = n->alien;
n->alien = NULL;
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
kfree(shared);
if (alien) {
@@ -1159,7 +1159,7 @@ static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
- spin_lock_init(&ptr->list_lock);
+ raw_spin_lock_init(&ptr->list_lock);
MAKE_ALL_LISTS(cachep, ptr, nodeid);
cachep->node[nodeid] = ptr;
@@ -1330,11 +1330,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
for_each_kmem_cache_node(cachep, node, n) {
unsigned long total_slabs, free_slabs, free_objs;
- spin_lock_irqsave(&n->list_lock, flags);
+ raw_spin_lock_irqsave(&n->list_lock, flags);
total_slabs = n->total_slabs;
free_slabs = n->free_slabs;
free_objs = n->free_objects;
- spin_unlock_irqrestore(&n->list_lock, flags);
+ raw_spin_unlock_irqrestore(&n->list_lock, flags);
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
node, total_slabs - free_slabs, total_slabs,
@@ -1370,6 +1370,8 @@ static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
account_slab(slab, cachep->gfporder, cachep, flags);
__folio_set_slab(folio);
+ /* Make the flag visible before any changes to folio->mapping */
+ smp_wmb();
/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
if (sk_memalloc_socks() && page_is_pfmemalloc(folio_page(folio, 0)))
slab_set_pfmemalloc(slab);
@@ -1387,9 +1389,11 @@ static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab)
BUG_ON(!folio_test_slab(folio));
__slab_clear_pfmemalloc(slab);
- __folio_clear_slab(folio);
page_mapcount_reset(folio_page(folio, 0));
folio->mapping = NULL;
+ /* Make the mapping reset visible before clearing the flag */
+ smp_wmb();
+ __folio_clear_slab(folio);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += 1 << order;
@@ -2096,7 +2100,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
+ assert_raw_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
#endif
}
@@ -2104,7 +2108,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&get_node(cachep, node)->list_lock);
+ assert_raw_spin_locked(&get_node(cachep, node)->list_lock);
#endif
}
@@ -2144,9 +2148,9 @@ static void do_drain(void *arg)
check_irq_off();
ac = cpu_cache_get(cachep);
n = get_node(cachep, node);
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
free_block(cachep, ac->entry, ac->avail, node, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
ac->avail = 0;
slabs_destroy(cachep, &list);
}
@@ -2164,9 +2168,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
drain_alien_cache(cachep, n->alien);
for_each_kmem_cache_node(cachep, node, n) {
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
drain_array_locked(cachep, n->shared, node, true, &list);
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
@@ -2188,10 +2192,10 @@ static int drain_freelist(struct kmem_cache *cache,
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
p = n->slabs_free.prev;
if (p == &n->slabs_free) {
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
goto out;
}
@@ -2204,7 +2208,7 @@ static int drain_freelist(struct kmem_cache *cache,
* to the cache.
*/
n->free_objects -= cache->num;
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slab_destroy(cache, slab);
nr_freed++;
}
@@ -2629,7 +2633,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
INIT_LIST_HEAD(&slab->slab_list);
n = get_node(cachep, slab_nid(slab));
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
n->total_slabs++;
if (!slab->active) {
list_add_tail(&slab->slab_list, &n->slabs_free);
@@ -2639,7 +2643,7 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num - slab->active;
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
}
@@ -2805,7 +2809,7 @@ static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
{
struct slab *slab;
- assert_spin_locked(&n->list_lock);
+ assert_raw_spin_locked(&n->list_lock);
slab = list_first_entry_or_null(&n->slabs_partial, struct slab,
slab_list);
if (!slab) {
@@ -2832,10 +2836,10 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
if (!gfp_pfmemalloc_allowed(flags))
return NULL;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
slab = get_first_slab(n, true);
if (!slab) {
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
return NULL;
}
@@ -2844,7 +2848,7 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
fixup_slab_list(cachep, n, slab, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
return obj;
@@ -2903,7 +2907,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
if (!n->free_objects && (!shared || !shared->avail))
goto direct_grow;
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
shared = READ_ONCE(n->shared);
/* See if we can refill from the shared array */
@@ -2927,7 +2931,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
must_grow:
n->free_objects -= ac->avail;
alloc_done:
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
direct_grow:
@@ -3147,7 +3151,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
BUG_ON(!n);
check_irq_off();
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
slab = get_first_slab(n, false);
if (!slab)
goto must_grow;
@@ -3165,12 +3169,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
fixup_slab_list(cachep, n, slab, &list);
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
return obj;
must_grow:
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
if (slab) {
/* This slab isn't counted yet so don't update free_objects */
@@ -3254,7 +3258,8 @@ slab_alloc_node(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags,
init = slab_want_init_on_alloc(flags, cachep);
out:
- slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
+ slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init,
+ cachep->object_size);
return objp;
}
@@ -3325,7 +3330,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
check_irq_off();
n = get_node(cachep, node);
- spin_lock(&n->list_lock);
+ raw_spin_lock(&n->list_lock);
if (n->shared) {
struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail;
@@ -3354,7 +3359,7 @@ free_done:
STATS_SET_FREEABLE(cachep, i);
}
#endif
- spin_unlock(&n->list_lock);
+ raw_spin_unlock(&n->list_lock);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
slabs_destroy(cachep, &list);
@@ -3446,16 +3451,6 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
return ret;
}
-/**
- * kmem_cache_alloc - Allocate an object
- * @cachep: The cache to allocate from.
- * @flags: See kmalloc().
- *
- * Allocate an object from this cache. The flags are only relevant
- * if the cache has no available objects.
- *
- * Return: pointer to the new object or %NULL in case of error
- */
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
return __kmem_cache_alloc_lru(cachep, NULL, flags);
@@ -3507,13 +3502,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
* Done outside of the IRQ disabled section.
*/
slab_post_alloc_hook(s, objcg, flags, size, p,
- slab_want_init_on_alloc(flags, s));
+ slab_want_init_on_alloc(flags, s), s->object_size);
/* FIXME: Trace call missing. Christoph would like a bulk variant */
return size;
error:
local_irq_enable();
cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
- slab_post_alloc_hook(s, objcg, flags, i, p, false);
+ slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
return 0;
}
@@ -3721,9 +3716,9 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
node = cpu_to_mem(cpu);
n = get_node(cachep, node);
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
free_block(cachep, ac->entry, ac->avail, node, &list);
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
free_percpu(prev);
@@ -3815,9 +3810,9 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
return;
}
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
drain_array_locked(cachep, ac, node, false, &list);
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
slabs_destroy(cachep, &list);
}
@@ -3901,7 +3896,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
for_each_kmem_cache_node(cachep, node, n) {
check_irq_on();
- spin_lock_irq(&n->list_lock);
+ raw_spin_lock_irq(&n->list_lock);
total_slabs += n->total_slabs;
free_slabs += n->free_slabs;
@@ -3910,7 +3905,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
if (n->shared)
shared_avail += n->shared->avail;
- spin_unlock_irq(&n->list_lock);
+ raw_spin_unlock_irq(&n->list_lock);
}
num_objs = total_slabs * cachep->num;
active_slabs = total_slabs - free_slabs;
diff --git a/mm/slab.h b/mm/slab.h
index 0202a8c2f0d2..7cc432969945 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -11,37 +11,43 @@ struct slab {
#if defined(CONFIG_SLAB)
+ struct kmem_cache *slab_cache;
union {
- struct list_head slab_list;
+ struct {
+ struct list_head slab_list;
+ void *freelist; /* array of free object indexes */
+ void *s_mem; /* first object */
+ };
struct rcu_head rcu_head;
};
- struct kmem_cache *slab_cache;
- void *freelist; /* array of free object indexes */
- void *s_mem; /* first object */
unsigned int active;
#elif defined(CONFIG_SLUB)
- union {
- struct list_head slab_list;
- struct rcu_head rcu_head;
-#ifdef CONFIG_SLUB_CPU_PARTIAL
- struct {
- struct slab *next;
- int slabs; /* Nr of slabs left */
- };
-#endif
- };
struct kmem_cache *slab_cache;
- /* Double-word boundary */
- void *freelist; /* first free object */
union {
- unsigned long counters;
struct {
- unsigned inuse:16;
- unsigned objects:15;
- unsigned frozen:1;
+ union {
+ struct list_head slab_list;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ struct {
+ struct slab *next;
+ int slabs; /* Nr of slabs left */
+ };
+#endif
+ };
+ /* Double-word boundary */
+ void *freelist; /* first free object */
+ union {
+ unsigned long counters;
+ struct {
+ unsigned inuse:16;
+ unsigned objects:15;
+ unsigned frozen:1;
+ };
+ };
};
+ struct rcu_head rcu_head;
};
unsigned int __unused;
@@ -66,9 +72,10 @@ struct slab {
#define SLAB_MATCH(pg, sl) \
static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
SLAB_MATCH(flags, __page_flags);
-SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
#ifndef CONFIG_SLOB
-SLAB_MATCH(rcu_head, rcu_head);
+SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
+#else
+SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
#endif
SLAB_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
@@ -76,6 +83,9 @@ SLAB_MATCH(memcg_data, memcg_data);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB)
+static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 2*sizeof(void *)));
+#endif
/**
* folio_slab - Converts from folio to slab.
@@ -207,8 +217,6 @@ struct kmem_cache {
unsigned int size; /* The aligned/padded/added on size */
unsigned int align; /* Alignment as calculated */
slab_flags_t flags; /* Active flags on the slab */
- unsigned int useroffset;/* Usercopy region offset */
- unsigned int usersize; /* Usercopy region size */
const char *name; /* Slab name for sysfs */
int refcount; /* Use counter */
void (*ctor)(void *); /* Called on object slot creation */
@@ -336,7 +344,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
SLAB_ACCOUNT)
#elif defined(CONFIG_SLUB)
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
- SLAB_TEMPORARY | SLAB_ACCOUNT | SLAB_NO_USER_FLAGS)
+ SLAB_TEMPORARY | SLAB_ACCOUNT | \
+ SLAB_NO_USER_FLAGS | SLAB_KMALLOC)
#else
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
@@ -356,6 +365,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | \
SLAB_ACCOUNT | \
+ SLAB_KMALLOC | \
SLAB_NO_USER_FLAGS)
bool __kmem_cache_empty(struct kmem_cache *);
@@ -720,13 +730,27 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
static inline void slab_post_alloc_hook(struct kmem_cache *s,
struct obj_cgroup *objcg, gfp_t flags,
- size_t size, void **p, bool init)
+ size_t size, void **p, bool init,
+ unsigned int orig_size)
{
+ unsigned int zero_size = s->object_size;
size_t i;
flags &= gfp_allowed_mask;
/*
+ * For kmalloc object, the allocated memory size(object_size) is likely
+ * larger than the requested size(orig_size). If redzone check is
+ * enabled for the extra space, don't zero it, as it will be redzoned
+ * soon. The redzone operation for this extra space could be seen as a
+ * replacement of current poisoning under certain debug option, and
+ * won't break other sanity checks.
+ */
+ if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) &&
+ (s->flags & SLAB_KMALLOC))
+ zero_size = orig_size;
+
+ /*
* As memory initialization might be integrated into KASAN,
* kasan_slab_alloc and initialization memset must be
* kept together to avoid discrepancies in behavior.
@@ -736,7 +760,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
for (i = 0; i < size; i++) {
p[i] = kasan_slab_alloc(s, p[i], flags, init);
if (p[i] && init && !kasan_has_integrated_init())
- memset(p[i], 0, s->object_size);
+ memset(p[i], 0, zero_size);
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, flags);
kmsan_slab_alloc(s, p[i], flags);
@@ -750,9 +774,8 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
* The slab lists for all objects.
*/
struct kmem_cache_node {
- spinlock_t list_lock;
-
#ifdef CONFIG_SLAB
+ raw_spinlock_t list_lock;
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
@@ -768,6 +791,7 @@ struct kmem_cache_node {
#endif
#ifdef CONFIG_SLUB
+ spinlock_t list_lock;
unsigned long nr_partial;
struct list_head partial;
#ifdef CONFIG_SLUB_DEBUG
@@ -871,4 +895,8 @@ void __check_heap_object(const void *ptr, unsigned long n,
}
#endif
+#ifdef CONFIG_SLUB_DEBUG
+void skip_orig_size_check(struct kmem_cache *s, const void *object);
+#endif
+
#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 0042fb2730d1..3e49bb830060 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -143,8 +143,10 @@ int slab_unmergeable(struct kmem_cache *s)
if (s->ctor)
return 1;
+#ifdef CONFIG_HARDENED_USERCOPY
if (s->usersize)
return 1;
+#endif
/*
* We may have set a slab to be unmergeable during bootstrap.
@@ -223,8 +225,10 @@ static struct kmem_cache *create_cache(const char *name,
s->size = s->object_size = object_size;
s->align = align;
s->ctor = ctor;
+#ifdef CONFIG_HARDENED_USERCOPY
s->useroffset = useroffset;
s->usersize = usersize;
+#endif
err = __kmem_cache_create(s, flags);
if (err)
@@ -317,7 +321,8 @@ kmem_cache_create_usercopy(const char *name,
flags &= CACHE_CREATE_MASK;
/* Fail closed on bad usersize of useroffset values. */
- if (WARN_ON(!usersize && useroffset) ||
+ if (!IS_ENABLED(CONFIG_HARDENED_USERCOPY) ||
+ WARN_ON(!usersize && useroffset) ||
WARN_ON(size < usersize || size - usersize < useroffset))
usersize = useroffset = 0;
@@ -595,8 +600,8 @@ void kmem_dump_obj(void *object)
ptroffset = ((char *)object - (char *)kp.kp_objp) - kp.kp_data_offset;
pr_cont(" pointer offset %lu", ptroffset);
}
- if (kp.kp_slab_cache && kp.kp_slab_cache->usersize)
- pr_cont(" size %u", kp.kp_slab_cache->usersize);
+ if (kp.kp_slab_cache && kp.kp_slab_cache->object_size)
+ pr_cont(" size %u", kp.kp_slab_cache->object_size);
if (kp.kp_ret)
pr_cont(" allocated at %pS\n", kp.kp_ret);
else
@@ -640,8 +645,10 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
align = max(align, size);
s->align = calculate_alignment(flags, align, size);
+#ifdef CONFIG_HARDENED_USERCOPY
s->useroffset = useroffset;
s->usersize = usersize;
+#endif
err = __kmem_cache_create(s, flags);
@@ -766,10 +773,16 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
#define KMALLOC_CGROUP_NAME(sz)
#endif
+#ifndef CONFIG_SLUB_TINY
+#define KMALLOC_RCL_NAME(sz) .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #sz,
+#else
+#define KMALLOC_RCL_NAME(sz)
+#endif
+
#define INIT_KMALLOC_INFO(__size, __short_size) \
{ \
.name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
- .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
+ KMALLOC_RCL_NAME(__short_size) \
KMALLOC_CGROUP_NAME(__short_size) \
KMALLOC_DMA_NAME(__short_size) \
.size = __size, \
@@ -855,7 +868,7 @@ void __init setup_kmalloc_cache_index_table(void)
static void __init
new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
{
- if (type == KMALLOC_RECLAIM) {
+ if ((KMALLOC_RECLAIM != KMALLOC_NORMAL) && (type == KMALLOC_RECLAIM)) {
flags |= SLAB_RECLAIM_ACCOUNT;
} else if (IS_ENABLED(CONFIG_MEMCG_KMEM) && (type == KMALLOC_CGROUP)) {
if (mem_cgroup_kmem_disabled()) {
@@ -1037,6 +1050,10 @@ size_t __ksize(const void *object)
return folio_size(folio);
}
+#ifdef CONFIG_SLUB_DEBUG
+ skip_orig_size_check(folio_slab(folio)->slab_cache, object);
+#endif
+
return slab_ksize(folio_slab(folio)->slab_cache);
}
diff --git a/mm/slub.c b/mm/slub.c
index 157527d7101b..891df05a4d45 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -187,6 +187,12 @@ do { \
#define USE_LOCKLESS_FAST_PATH() (false)
#endif
+#ifndef CONFIG_SLUB_TINY
+#define __fastpath_inline __always_inline
+#else
+#define __fastpath_inline
+#endif
+
#ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG_ON
DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
@@ -241,6 +247,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/* Enable to log cmpxchg failures */
#undef SLUB_DEBUG_CMPXCHG
+#ifndef CONFIG_SLUB_TINY
/*
* Minimum number of partial slabs. These will be left on the partial
* lists even if they are empty. kmem_cache_shrink may reclaim them.
@@ -253,6 +260,10 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
* sort the partial list by the number of objects in use.
*/
#define MAX_PARTIAL 10
+#else
+#define MIN_PARTIAL 0
+#define MAX_PARTIAL 0
+#endif
#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
SLAB_POISON | SLAB_STORE_USER)
@@ -298,7 +309,7 @@ struct track {
enum track_item { TRACK_ALLOC, TRACK_FREE };
-#ifdef CONFIG_SYSFS
+#ifdef SLAB_SUPPORTS_SYSFS
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
#else
@@ -332,10 +343,12 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
*/
static nodemask_t slab_nodes;
+#ifndef CONFIG_SLUB_TINY
/*
* Workqueue used for flush_cpu_slab().
*/
static struct workqueue_struct *flushwq;
+#endif
/********************************************************************
* Core slab cache functions
@@ -381,10 +394,12 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object)
return freelist_dereference(s, object + s->offset);
}
+#ifndef CONFIG_SLUB_TINY
static void prefetch_freepointer(const struct kmem_cache *s, void *object)
{
prefetchw(object + s->offset);
}
+#endif
/*
* When running under KMSAN, get_freepointer_safe() may return an uninitialized
@@ -829,6 +844,17 @@ static inline void set_orig_size(struct kmem_cache *s,
if (!slub_debug_orig_size(s))
return;
+#ifdef CONFIG_KASAN_GENERIC
+ /*
+ * KASAN could save its free meta data in object's data area at
+ * offset 0, if the size is larger than 'orig_size', it will
+ * overlap the data redzone in [orig_size+1, object_size], and
+ * the check should be skipped.
+ */
+ if (kasan_metadata_size(s, true) > orig_size)
+ orig_size = s->object_size;
+#endif
+
p += get_info_end(s);
p += sizeof(struct track) * 2;
@@ -848,6 +874,11 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
return *(unsigned int *)p;
}
+void skip_orig_size_check(struct kmem_cache *s, const void *object)
+{
+ set_orig_size(s, (void *)object, s->object_size);
+}
+
static void slab_bug(struct kmem_cache *s, char *fmt, ...)
{
struct va_format vaf;
@@ -910,7 +941,7 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
if (slub_debug_orig_size(s))
off += sizeof(unsigned int);
- off += kasan_metadata_size(s);
+ off += kasan_metadata_size(s, false);
if (off != size_from_object(s))
/* Beginning of the filler is the free pointer */
@@ -966,17 +997,28 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab,
static void init_object(struct kmem_cache *s, void *object, u8 val)
{
u8 *p = kasan_reset_tag(object);
+ unsigned int poison_size = s->object_size;
- if (s->flags & SLAB_RED_ZONE)
+ if (s->flags & SLAB_RED_ZONE) {
memset(p - s->red_left_pad, val, s->red_left_pad);
+ if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
+ /*
+ * Redzone the extra allocated space by kmalloc than
+ * requested, and the poison size will be limited to
+ * the original request size accordingly.
+ */
+ poison_size = get_orig_size(s, object);
+ }
+ }
+
if (s->flags & __OBJECT_POISON) {
- memset(p, POISON_FREE, s->object_size - 1);
- p[s->object_size - 1] = POISON_END;
+ memset(p, POISON_FREE, poison_size - 1);
+ p[poison_size - 1] = POISON_END;
}
if (s->flags & SLAB_RED_ZONE)
- memset(p + s->object_size, val, s->inuse - s->object_size);
+ memset(p + poison_size, val, s->inuse - poison_size);
}
static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
@@ -1070,7 +1112,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
off += sizeof(unsigned int);
}
- off += kasan_metadata_size(s);
+ off += kasan_metadata_size(s, false);
if (size_from_object(s) == off)
return 1;
@@ -1120,6 +1162,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
{
u8 *p = object;
u8 *endobject = object + s->object_size;
+ unsigned int orig_size;
if (s->flags & SLAB_RED_ZONE) {
if (!check_bytes_and_report(s, slab, object, "Left Redzone",
@@ -1129,6 +1172,17 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
+
+ if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
+ orig_size = get_orig_size(s, object);
+
+ if (s->object_size > orig_size &&
+ !check_bytes_and_report(s, slab, object,
+ "kmalloc Redzone", p + orig_size,
+ val, s->object_size - orig_size)) {
+ return 0;
+ }
+ }
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
check_bytes_and_report(s, slab, p, "Alignment padding",
@@ -1363,7 +1417,7 @@ static inline int alloc_consistency_checks(struct kmem_cache *s,
return 1;
}
-static noinline int alloc_debug_processing(struct kmem_cache *s,
+static noinline bool alloc_debug_processing(struct kmem_cache *s,
struct slab *slab, void *object, int orig_size)
{
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
@@ -1375,7 +1429,7 @@ static noinline int alloc_debug_processing(struct kmem_cache *s,
trace(s, slab, object, 1);
set_orig_size(s, object, orig_size);
init_object(s, object, SLUB_RED_ACTIVE);
- return 1;
+ return true;
bad:
if (folio_test_slab(slab_folio(slab))) {
@@ -1388,7 +1442,7 @@ bad:
slab->inuse = slab->objects;
slab->freelist = NULL;
}
- return 0;
+ return false;
}
static inline int free_consistency_checks(struct kmem_cache *s,
@@ -1641,17 +1695,17 @@ static inline void setup_object_debug(struct kmem_cache *s, void *object) {}
static inline
void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
-static inline int alloc_debug_processing(struct kmem_cache *s,
- struct slab *slab, void *object, int orig_size) { return 0; }
+static inline bool alloc_debug_processing(struct kmem_cache *s,
+ struct slab *slab, void *object, int orig_size) { return true; }
-static inline void free_debug_processing(
- struct kmem_cache *s, struct slab *slab,
- void *head, void *tail, int bulk_cnt,
- unsigned long addr) {}
+static inline bool free_debug_processing(struct kmem_cache *s,
+ struct slab *slab, void *head, void *tail, int *bulk_cnt,
+ unsigned long addr, depot_stack_handle_t handle) { return true; }
static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; }
+static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
static inline void set_track(struct kmem_cache *s, void *object,
enum track_item alloc, unsigned long addr) {}
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
@@ -1676,11 +1730,13 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
+#ifndef CONFIG_SLUB_TINY
static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
return false;
}
+#endif
#endif /* CONFIG_SLUB_DEBUG */
/*
@@ -1800,6 +1856,8 @@ static inline struct slab *alloc_slab_page(gfp_t flags, int node,
slab = folio_slab(folio);
__folio_set_slab(folio);
+ /* Make the flag visible before any changes to folio->mapping */
+ smp_wmb();
if (page_is_pfmemalloc(folio_page(folio, 0)))
slab_set_pfmemalloc(slab);
@@ -1999,17 +2057,11 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab)
int order = folio_order(folio);
int pages = 1 << order;
- if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
- void *p;
-
- slab_pad_check(s, slab);
- for_each_object(p, s, slab_address(slab), slab->objects)
- check_object(s, slab, p, SLUB_RED_INACTIVE);
- }
-
__slab_clear_pfmemalloc(slab);
- __folio_clear_slab(folio);
folio->mapping = NULL;
+ /* Make the mapping reset visible before clearing the flag */
+ smp_wmb();
+ __folio_clear_slab(folio);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
unaccount_slab(slab, order, s);
@@ -2025,9 +2077,17 @@ static void rcu_free_slab(struct rcu_head *h)
static void free_slab(struct kmem_cache *s, struct slab *slab)
{
- if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
+ if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
+ void *p;
+
+ slab_pad_check(s, slab);
+ for_each_object(p, s, slab_address(slab), slab->objects)
+ check_object(s, slab, p, SLUB_RED_INACTIVE);
+ }
+
+ if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU))
call_rcu(&slab->rcu_head, rcu_free_slab);
- } else
+ else
__free_slab(s, slab);
}
@@ -2214,7 +2274,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
if (!pfmemalloc_match(slab, pc->flags))
continue;
- if (kmem_cache_debug(s)) {
+ if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
object = alloc_single_from_partial(s, n, slab,
pc->orig_size);
if (object)
@@ -2329,6 +2389,8 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context
return get_any_partial(s, pc);
}
+#ifndef CONFIG_SLUB_TINY
+
#ifdef CONFIG_PREEMPTION
/*
* Calculate the next globally unique transaction for disambiguation
@@ -2342,7 +2404,7 @@ static void *get_partial(struct kmem_cache *s, int node, struct partial_context
* different cpus.
*/
#define TID_STEP 1
-#endif
+#endif /* CONFIG_PREEMPTION */
static inline unsigned long next_tid(unsigned long tid)
{
@@ -2411,7 +2473,7 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
void *freelist)
{
- enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST };
+ enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST };
struct kmem_cache_node *n = get_node(s, slab_nid(slab));
int free_delta = 0;
enum slab_modes mode = M_NONE;
@@ -2487,14 +2549,6 @@ redo:
* acquire_slab() will see a slab that is frozen
*/
spin_lock_irqsave(&n->list_lock, flags);
- } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
- mode = M_FULL;
- /*
- * This also ensures that the scanning of full
- * slabs from diagnostic functions will not see
- * any frozen slabs.
- */
- spin_lock_irqsave(&n->list_lock, flags);
} else {
mode = M_FULL_NOLIST;
}
@@ -2504,7 +2558,7 @@ redo:
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab")) {
- if (mode == M_PARTIAL || mode == M_FULL)
+ if (mode == M_PARTIAL)
spin_unlock_irqrestore(&n->list_lock, flags);
goto redo;
}
@@ -2518,10 +2572,6 @@ redo:
stat(s, DEACTIVATE_EMPTY);
discard_slab(s, slab);
stat(s, FREE_SLAB);
- } else if (mode == M_FULL) {
- add_full(s, n, slab);
- spin_unlock_irqrestore(&n->list_lock, flags);
- stat(s, DEACTIVATE_FULL);
} else if (mode == M_FULL_NOLIST) {
stat(s, DEACTIVATE_FULL);
}
@@ -2803,6 +2853,13 @@ static int slub_cpu_dead(unsigned int cpu)
return 0;
}
+#else /* CONFIG_SLUB_TINY */
+static inline void flush_all_cpus_locked(struct kmem_cache *s) { }
+static inline void flush_all(struct kmem_cache *s) { }
+static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { }
+static inline int slub_cpu_dead(unsigned int cpu) { return 0; }
+#endif /* CONFIG_SLUB_TINY */
+
/*
* Check if the objects in a per cpu structure fit numa
* locality expectations.
@@ -2828,38 +2885,28 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
}
/* Supports checking bulk free of a constructed freelist */
-static noinline void free_debug_processing(
- struct kmem_cache *s, struct slab *slab,
- void *head, void *tail, int bulk_cnt,
- unsigned long addr)
+static inline bool free_debug_processing(struct kmem_cache *s,
+ struct slab *slab, void *head, void *tail, int *bulk_cnt,
+ unsigned long addr, depot_stack_handle_t handle)
{
- struct kmem_cache_node *n = get_node(s, slab_nid(slab));
- struct slab *slab_free = NULL;
+ bool checks_ok = false;
void *object = head;
int cnt = 0;
- unsigned long flags;
- bool checks_ok = false;
- depot_stack_handle_t handle = 0;
-
- if (s->flags & SLAB_STORE_USER)
- handle = set_track_prepare();
-
- spin_lock_irqsave(&n->list_lock, flags);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
if (!check_slab(s, slab))
goto out;
}
- if (slab->inuse < bulk_cnt) {
+ if (slab->inuse < *bulk_cnt) {
slab_err(s, slab, "Slab has %d allocated objects but %d are to be freed\n",
- slab->inuse, bulk_cnt);
+ slab->inuse, *bulk_cnt);
goto out;
}
next_object:
- if (++cnt > bulk_cnt)
+ if (++cnt > *bulk_cnt)
goto out_cnt;
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
@@ -2881,61 +2928,22 @@ next_object:
checks_ok = true;
out_cnt:
- if (cnt != bulk_cnt)
+ if (cnt != *bulk_cnt) {
slab_err(s, slab, "Bulk free expected %d objects but found %d\n",
- bulk_cnt, cnt);
-
-out:
- if (checks_ok) {
- void *prior = slab->freelist;
-
- /* Perform the actual freeing while we still hold the locks */
- slab->inuse -= cnt;
- set_freepointer(s, tail, prior);
- slab->freelist = head;
-
- /*
- * If the slab is empty, and node's partial list is full,
- * it should be discarded anyway no matter it's on full or
- * partial list.
- */
- if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
- slab_free = slab;
-
- if (!prior) {
- /* was on full list */
- remove_full(s, n, slab);
- if (!slab_free) {
- add_partial(n, slab, DEACTIVATE_TO_TAIL);
- stat(s, FREE_ADD_PARTIAL);
- }
- } else if (slab_free) {
- remove_partial(n, slab);
- stat(s, FREE_REMOVE_PARTIAL);
- }
+ *bulk_cnt, cnt);
+ *bulk_cnt = cnt;
}
- if (slab_free) {
- /*
- * Update the counters while still holding n->list_lock to
- * prevent spurious validation warnings
- */
- dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
- }
-
- spin_unlock_irqrestore(&n->list_lock, flags);
+out:
if (!checks_ok)
slab_fix(s, "Object at 0x%p not freed", object);
- if (slab_free) {
- stat(s, FREE_SLAB);
- free_slab(s, slab_free);
- }
+ return checks_ok;
}
#endif /* CONFIG_SLUB_DEBUG */
-#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
+#if defined(CONFIG_SLUB_DEBUG) || defined(SLAB_SUPPORTS_SYSFS)
static unsigned long count_partial(struct kmem_cache_node *n,
int (*get_count)(struct slab *))
{
@@ -2949,12 +2957,12 @@ static unsigned long count_partial(struct kmem_cache_node *n,
spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
-#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
+#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */
+#ifdef CONFIG_SLUB_DEBUG
static noinline void
slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
{
-#ifdef CONFIG_SLUB_DEBUG
static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
int node;
@@ -2985,8 +2993,11 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
node, nr_slabs, nr_objs, nr_free);
}
-#endif
}
+#else /* CONFIG_SLUB_DEBUG */
+static inline void
+slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { }
+#endif
static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
{
@@ -2996,6 +3007,7 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
return true;
}
+#ifndef CONFIG_SLUB_TINY
/*
* Check the slab->freelist and either transfer the freelist to the
* per cpu freelist or deactivate the slab.
@@ -3283,45 +3295,13 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
return p;
}
-/*
- * If the object has been wiped upon free, make sure it's fully initialized by
- * zeroing out freelist pointer.
- */
-static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
- void *obj)
-{
- if (unlikely(slab_want_init_on_free(s)) && obj)
- memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
- 0, sizeof(void *));
-}
-
-/*
- * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
- * have the fastpath folded into their functions. So no function call
- * overhead for requests that can be satisfied on the fastpath.
- *
- * The fastpath works by first checking if the lockless freelist can be used.
- * If not then __slab_alloc is called for slow processing.
- *
- * Otherwise we can simply pick the next object from the lockless free list.
- */
-static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
+static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
- void *object;
struct kmem_cache_cpu *c;
struct slab *slab;
unsigned long tid;
- struct obj_cgroup *objcg = NULL;
- bool init = false;
-
- s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
- if (!s)
- return NULL;
-
- object = kfence_alloc(s, orig_size, gfpflags);
- if (unlikely(object))
- goto out;
+ void *object;
redo:
/*
@@ -3391,22 +3371,95 @@ redo:
stat(s, ALLOC_FASTPATH);
}
+ return object;
+}
+#else /* CONFIG_SLUB_TINY */
+static void *__slab_alloc_node(struct kmem_cache *s,
+ gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
+{
+ struct partial_context pc;
+ struct slab *slab;
+ void *object;
+
+ pc.flags = gfpflags;
+ pc.slab = &slab;
+ pc.orig_size = orig_size;
+ object = get_partial(s, node, &pc);
+
+ if (object)
+ return object;
+
+ slab = new_slab(s, gfpflags, node);
+ if (unlikely(!slab)) {
+ slab_out_of_memory(s, gfpflags, node);
+ return NULL;
+ }
+
+ object = alloc_single_from_new_slab(s, slab, orig_size);
+
+ return object;
+}
+#endif /* CONFIG_SLUB_TINY */
+
+/*
+ * If the object has been wiped upon free, make sure it's fully initialized by
+ * zeroing out freelist pointer.
+ */
+static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
+ void *obj)
+{
+ if (unlikely(slab_want_init_on_free(s)) && obj)
+ memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
+ 0, sizeof(void *));
+}
+
+/*
+ * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
+ * have the fastpath folded into their functions. So no function call
+ * overhead for requests that can be satisfied on the fastpath.
+ *
+ * The fastpath works by first checking if the lockless freelist can be used.
+ * If not then __slab_alloc is called for slow processing.
+ *
+ * Otherwise we can simply pick the next object from the lockless free list.
+ */
+static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
+{
+ void *object;
+ struct obj_cgroup *objcg = NULL;
+ bool init = false;
+
+ s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
+ if (!s)
+ return NULL;
+
+ object = kfence_alloc(s, orig_size, gfpflags);
+ if (unlikely(object))
+ goto out;
+
+ object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
+
maybe_wipe_obj_freeptr(s, object);
init = slab_want_init_on_alloc(gfpflags, s);
out:
- slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
+ /*
+ * When init equals 'true', like for kzalloc() family, only
+ * @orig_size bytes might be zeroed instead of s->object_size
+ */
+ slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
return object;
}
-static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
+static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags, unsigned long addr, size_t orig_size)
{
return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
}
-static __always_inline
+static __fastpath_inline
void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags)
{
@@ -3448,6 +3501,67 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
+static noinline void free_to_partial_list(
+ struct kmem_cache *s, struct slab *slab,
+ void *head, void *tail, int bulk_cnt,
+ unsigned long addr)
+{
+ struct kmem_cache_node *n = get_node(s, slab_nid(slab));
+ struct slab *slab_free = NULL;
+ int cnt = bulk_cnt;
+ unsigned long flags;
+ depot_stack_handle_t handle = 0;
+
+ if (s->flags & SLAB_STORE_USER)
+ handle = set_track_prepare();
+
+ spin_lock_irqsave(&n->list_lock, flags);
+
+ if (free_debug_processing(s, slab, head, tail, &cnt, addr, handle)) {
+ void *prior = slab->freelist;
+
+ /* Perform the actual freeing while we still hold the locks */
+ slab->inuse -= cnt;
+ set_freepointer(s, tail, prior);
+ slab->freelist = head;
+
+ /*
+ * If the slab is empty, and node's partial list is full,
+ * it should be discarded anyway no matter it's on full or
+ * partial list.
+ */
+ if (slab->inuse == 0 && n->nr_partial >= s->min_partial)
+ slab_free = slab;
+
+ if (!prior) {
+ /* was on full list */
+ remove_full(s, n, slab);
+ if (!slab_free) {
+ add_partial(n, slab, DEACTIVATE_TO_TAIL);
+ stat(s, FREE_ADD_PARTIAL);
+ }
+ } else if (slab_free) {
+ remove_partial(n, slab);
+ stat(s, FREE_REMOVE_PARTIAL);
+ }
+ }
+
+ if (slab_free) {
+ /*
+ * Update the counters while still holding n->list_lock to
+ * prevent spurious validation warnings
+ */
+ dec_slabs_node(s, slab_nid(slab_free), slab_free->objects);
+ }
+
+ spin_unlock_irqrestore(&n->list_lock, flags);
+
+ if (slab_free) {
+ stat(s, FREE_SLAB);
+ free_slab(s, slab_free);
+ }
+}
+
/*
* Slow path handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
@@ -3473,8 +3587,8 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
if (kfence_free(head))
return;
- if (kmem_cache_debug(s)) {
- free_debug_processing(s, slab, head, tail, cnt, addr);
+ if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
+ free_to_partial_list(s, slab, head, tail, cnt, addr);
return;
}
@@ -3574,6 +3688,7 @@ slab_empty:
discard_slab(s, slab);
}
+#ifndef CONFIG_SLUB_TINY
/*
* Fastpath with forced inlining to produce a kfree and kmem_cache_free that
* can perform fastpath freeing without additional function calls.
@@ -3648,8 +3763,18 @@ redo:
}
stat(s, FREE_FASTPATH);
}
+#else /* CONFIG_SLUB_TINY */
+static void do_slab_free(struct kmem_cache *s,
+ struct slab *slab, void *head, void *tail,
+ int cnt, unsigned long addr)
+{
+ void *tail_obj = tail ? : head;
+
+ __slab_free(s, slab, head, tail_obj, cnt, addr);
+}
+#endif /* CONFIG_SLUB_TINY */
-static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
+static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, void **p, int cnt,
unsigned long addr)
{
@@ -3782,18 +3907,13 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
-/* Note that interrupts must be enabled when calling this function. */
-int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- void **p)
+#ifndef CONFIG_SLUB_TINY
+static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p, struct obj_cgroup *objcg)
{
struct kmem_cache_cpu *c;
int i;
- struct obj_cgroup *objcg = NULL;
- /* memcg and kmem_cache debug support */
- s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
- if (unlikely(!s))
- return false;
/*
* Drain objects in the per cpu slab, while disabling local
* IRQs, which protects against PREEMPT and interrupts
@@ -3847,19 +3967,72 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
local_unlock_irq(&s->cpu_slab->lock);
slub_put_cpu_ptr(s->cpu_slab);
- /*
- * memcg and kmem_cache debug support and memory initialization.
- * Done outside of the IRQ disabled fastpath loop.
- */
- slab_post_alloc_hook(s, objcg, flags, size, p,
- slab_want_init_on_alloc(flags, s));
return i;
+
error:
slub_put_cpu_ptr(s->cpu_slab);
- slab_post_alloc_hook(s, objcg, flags, i, p, false);
+ slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
+ kmem_cache_free_bulk(s, i, p);
+ return 0;
+
+}
+#else /* CONFIG_SLUB_TINY */
+static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags,
+ size_t size, void **p, struct obj_cgroup *objcg)
+{
+ int i;
+
+ for (i = 0; i < size; i++) {
+ void *object = kfence_alloc(s, s->object_size, flags);
+
+ if (unlikely(object)) {
+ p[i] = object;
+ continue;
+ }
+
+ p[i] = __slab_alloc_node(s, flags, NUMA_NO_NODE,
+ _RET_IP_, s->object_size);
+ if (unlikely(!p[i]))
+ goto error;
+
+ maybe_wipe_obj_freeptr(s, p[i]);
+ }
+
+ return i;
+
+error:
+ slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size);
kmem_cache_free_bulk(s, i, p);
return 0;
}
+#endif /* CONFIG_SLUB_TINY */
+
+/* Note that interrupts must be enabled when calling this function. */
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+ void **p)
+{
+ int i;
+ struct obj_cgroup *objcg = NULL;
+
+ if (!size)
+ return 0;
+
+ /* memcg and kmem_cache debug support */
+ s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
+ if (unlikely(!s))
+ return 0;
+
+ i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg);
+
+ /*
+ * memcg and kmem_cache debug support and memory initialization.
+ * Done outside of the IRQ disabled fastpath loop.
+ */
+ if (i != 0)
+ slab_post_alloc_hook(s, objcg, flags, size, p,
+ slab_want_init_on_alloc(flags, s), s->object_size);
+ return i;
+}
EXPORT_SYMBOL(kmem_cache_alloc_bulk);
@@ -3883,7 +4056,8 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk);
* take the list_lock.
*/
static unsigned int slub_min_order;
-static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
+static unsigned int slub_max_order =
+ IS_ENABLED(CONFIG_SLUB_TINY) ? 1 : PAGE_ALLOC_COSTLY_ORDER;
static unsigned int slub_min_objects;
/*
@@ -4014,10 +4188,12 @@ init_kmem_cache_node(struct kmem_cache_node *n)
#endif
}
+#ifndef CONFIG_SLUB_TINY
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
{
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
- KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
+ NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
+ sizeof(struct kmem_cache_cpu));
/*
* Must align to double word boundary for the double cmpxchg
@@ -4033,6 +4209,12 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
return 1;
}
+#else
+static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
+{
+ return 1;
+}
+#endif /* CONFIG_SLUB_TINY */
static struct kmem_cache *kmem_cache_node;
@@ -4095,7 +4277,9 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
void __kmem_cache_release(struct kmem_cache *s)
{
cache_random_seq_destroy(s);
+#ifndef CONFIG_SLUB_TINY
free_percpu(s->cpu_slab);
+#endif
free_kmem_cache_nodes(s);
}
@@ -4202,7 +4386,8 @@ static int calculate_sizes(struct kmem_cache *s)
*/
s->inuse = size;
- if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
+ if (slub_debug_orig_size(s) ||
+ (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
s->ctor) {
/*
@@ -4872,8 +5057,10 @@ void __init kmem_cache_init(void)
void __init kmem_cache_init_late(void)
{
+#ifndef CONFIG_SLUB_TINY
flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
WARN_ON(!flushwq);
+#endif
}
struct kmem_cache *
@@ -4924,7 +5111,7 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
return 0;
}
-#ifdef CONFIG_SYSFS
+#ifdef SLAB_SUPPORTS_SYSFS
static int count_inuse(struct slab *slab)
{
return slab->inuse;
@@ -5182,7 +5369,7 @@ static void process_slab(struct loc_track *t, struct kmem_cache *s,
#endif /* CONFIG_DEBUG_FS */
#endif /* CONFIG_SLUB_DEBUG */
-#ifdef CONFIG_SYSFS
+#ifdef SLAB_SUPPORTS_SYSFS
enum slab_stat_type {
SL_ALL, /* All slabs */
SL_PARTIAL, /* Only partially allocated slabs */
@@ -5502,11 +5689,13 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
SLAB_ATTR_RO(cache_dma);
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
static ssize_t usersize_show(struct kmem_cache *s, char *buf)
{
return sysfs_emit(buf, "%u\n", s->usersize);
}
SLAB_ATTR_RO(usersize);
+#endif
static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
{
@@ -5586,7 +5775,21 @@ static ssize_t failslab_show(struct kmem_cache *s, char *buf)
{
return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
}
-SLAB_ATTR_RO(failslab);
+
+static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
+ size_t length)
+{
+ if (s->refcount > 1)
+ return -EINVAL;
+
+ if (buf[0] == '1')
+ WRITE_ONCE(s->flags, s->flags | SLAB_FAILSLAB);
+ else
+ WRITE_ONCE(s->flags, s->flags & ~SLAB_FAILSLAB);
+
+ return length;
+}
+SLAB_ATTR(failslab);
#endif
static ssize_t shrink_show(struct kmem_cache *s, char *buf)
@@ -5803,7 +6006,9 @@ static struct attribute *slab_attrs[] = {
#ifdef CONFIG_FAILSLAB
&failslab_attr.attr,
#endif
+#ifdef CONFIG_HARDENED_USERCOPY
&usersize_attr.attr,
+#endif
#ifdef CONFIG_KFENCE
&skip_kfence_attr.attr,
#endif
@@ -5920,11 +6125,6 @@ static int sysfs_slab_add(struct kmem_cache *s)
struct kset *kset = cache_kset(s);
int unmergeable = slab_unmergeable(s);
- if (!kset) {
- kobject_init(&s->kobj, &slab_ktype);
- return 0;
- }
-
if (!unmergeable && disable_higher_order_debug &&
(slub_debug & DEBUG_METADATA_FLAGS))
unmergeable = 1;
@@ -6054,9 +6254,8 @@ static int __init slab_sysfs_init(void)
mutex_unlock(&slab_mutex);
return 0;
}
-
-__initcall(slab_sysfs_init);
-#endif /* CONFIG_SYSFS */
+late_initcall(slab_sysfs_init);
+#endif /* SLAB_SUPPORTS_SYSFS */
#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
static int slab_debugfs_show(struct seq_file *seq, void *v)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 026199c047e0..8fcc5fa768c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3987,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
goto next;
if (!pmd_trans_huge(pmd[i])) {
- if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
+ if (arch_has_hw_nonleaf_pmd_young() &&
get_cap(LRU_GEN_NONLEAF_YOUNG))
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
@@ -4085,14 +4085,14 @@ restart:
#endif
walk->mm_stats[MM_NONLEAF_TOTAL]++;
-#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
- if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
+ if (arch_has_hw_nonleaf_pmd_young() &&
+ get_cap(LRU_GEN_NONLEAF_YOUNG)) {
if (!pmd_young(val))
continue;
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
}
-#endif
+
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
continue;
@@ -5392,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
caps |= BIT(LRU_GEN_MM_WALK);
- if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
+ if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index eeea0a6a75b6..07db2f436d44 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -862,8 +862,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
struct file *file;
p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
- if (!p)
+ if (!p) {
+ sock_release(csocket);
return -ENOMEM;
+ }
csocket->sk->sk_allocation = GFP_NOIO;
file = sock_alloc_file(csocket, 0, NULL);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 215af9b3b589..c57d643afb10 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -972,6 +972,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
if (!hcon)
return -ENOENT;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index dc65974f5adb..1c3c7ff5c3c6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -737,7 +737,7 @@ static int __init bt_init(void)
err = bt_sysfs_init();
if (err < 0)
- return err;
+ goto cleanup_led;
err = sock_register(&bt_sock_family_ops);
if (err)
@@ -773,6 +773,8 @@ unregister_socket:
sock_unregister(PF_BLUETOOTH);
cleanup_sysfs:
bt_sysfs_cleanup();
+cleanup_led:
+ bt_leds_cleanup();
return err;
}
diff --git a/net/bluetooth/hci_codec.c b/net/bluetooth/hci_codec.c
index 38201532f58e..3cc135bb1d30 100644
--- a/net/bluetooth/hci_codec.c
+++ b/net/bluetooth/hci_codec.c
@@ -72,9 +72,8 @@ static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport,
continue;
}
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS,
- sizeof(*cmd), cmd,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS,
+ sizeof(*cmd), cmd, 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read codec capabilities (%ld)",
PTR_ERR(skb));
@@ -127,8 +126,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
struct hci_op_read_local_codec_caps caps;
__u8 i;
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
+ 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
@@ -158,7 +157,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
for (i = 0; i < std_codecs->num; i++) {
caps.id = std_codecs->codec[i];
caps.direction = 0x00;
- hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps);
+ hci_read_codec_capabilities(hdev,
+ LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps);
}
skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num)
@@ -178,7 +178,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev)
caps.cid = vnd_codecs->codec[i].cid;
caps.vid = vnd_codecs->codec[i].vid;
caps.direction = 0x00;
- hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps);
+ hci_read_codec_capabilities(hdev,
+ LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps);
}
error:
@@ -194,8 +195,8 @@ void hci_read_supported_codecs_v2(struct hci_dev *hdev)
struct hci_op_read_local_codec_caps caps;
__u8 i;
- skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL,
- HCI_CMD_TIMEOUT);
+ skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL,
+ 0, HCI_CMD_TIMEOUT, NULL);
if (IS_ERR(skb)) {
bt_dev_err(hdev, "Failed to read local supported codecs (%ld)",
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0540555b3704..d97fac4f7130 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2764,7 +2764,8 @@ int hci_register_suspend_notifier(struct hci_dev *hdev)
{
int ret = 0;
- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ if (!hdev->suspend_notifier.notifier_call &&
+ !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
ret = register_pm_notifier(&hdev->suspend_notifier);
}
@@ -2776,8 +2777,11 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev)
{
int ret = 0;
- if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks))
+ if (hdev->suspend_notifier.notifier_call) {
ret = unregister_pm_notifier(&hdev->suspend_notifier);
+ if (!ret)
+ hdev->suspend_notifier.notifier_call = NULL;
+ }
return ret;
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 5a0296a4352e..f7e006a36382 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -269,7 +269,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
const void *param)
{
- bt_dev_err(req->hdev, "HCI_REQ-0x%4.4x", opcode);
+ bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode);
hci_req_add_ev(req, opcode, plen, param, 0);
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 76c3107c9f91..1fc693122a47 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -12,6 +12,7 @@
#include <net/bluetooth/mgmt.h>
#include "hci_request.h"
+#include "hci_codec.h"
#include "hci_debugfs.h"
#include "smp.h"
#include "eir.h"
@@ -3780,7 +3781,8 @@ static int hci_read_page_scan_activity_sync(struct hci_dev *hdev)
static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev)
{
if (!(hdev->commands[18] & 0x04) ||
- !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING))
+ !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
+ test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING,
@@ -4238,11 +4240,12 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev)
/* Read local codec list if the HCI command is supported */
static int hci_read_local_codecs_sync(struct hci_dev *hdev)
{
- if (!(hdev->commands[29] & 0x20))
- return 0;
+ if (hdev->commands[45] & 0x04)
+ hci_read_supported_codecs_v2(hdev);
+ else if (hdev->commands[29] & 0x20)
+ hci_read_supported_codecs(hdev);
- return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL,
- HCI_CMD_TIMEOUT);
+ return 0;
}
/* Read local pairing options if the HCI command is supported */
@@ -4298,7 +4301,8 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev)
bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED);
if (!(hdev->commands[18] & 0x08) ||
- !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING))
+ !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
+ test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
return 0;
if (enabled == hdev->err_data_reporting)
@@ -4457,6 +4461,9 @@ static const struct {
HCI_QUIRK_BROKEN(STORED_LINK_KEY,
"HCI Delete Stored Link Key command is advertised, "
"but not supported."),
+ HCI_QUIRK_BROKEN(ERR_DATA_REPORTING,
+ "HCI Read Default Erroneous Data Reporting command is "
+ "advertised, but not supported."),
HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER,
"HCI Read Transmit Power Level command is advertised, "
"but not supported."),
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index f825857db6d0..26db929b97c4 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -879,6 +879,7 @@ static int iso_listen_bis(struct sock *sk)
iso_pi(sk)->bc_sid);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
return err;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 9c24947aa41e..9fdede5fe71c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4453,7 +4453,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
chan->ident = cmd->ident;
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp);
- chan->num_conf_rsp++;
+ if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP)
+ chan->num_conf_rsp++;
/* Reset config buffer. */
chan->conf_len = 0;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 27dcdcc0b808..c69168f11e44 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -677,7 +677,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
static int can_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_can_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_can_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
dev->type, skb->len);
@@ -692,7 +692,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canfd_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canfd_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
dev->type, skb->len);
@@ -707,7 +707,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
static int canxl_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
- if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canxl_skb(skb)))) {
+ if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canxl_skb(skb))) {
pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n",
dev->type, skb->len);
diff --git a/net/core/dst.c b/net/core/dst.c
index bc9c9be4e080..a4e738d321ba 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -174,7 +174,7 @@ void dst_release(struct dst_entry *dst)
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
if (!newrefcnt)
- call_rcu(&dst->rcu_head, dst_destroy_rcu);
+ call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index 846588c0070a..53a206d11685 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -49,7 +49,8 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
return NULL;
}
- pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN);
+ if (pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN))
+ return NULL;
dsa_default_offload_fwd_mark(skb);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 38fa19c1e2d5..429250298ac4 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -21,7 +21,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
if (!skb->dev)
return NULL;
- pskb_trim_rcsum(skb, skb->len - len);
+ if (pskb_trim_rcsum(skb, skb->len - len))
+ return NULL;
dsa_default_offload_fwd_mark(skb);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 83e4136516b0..1a85125bda6d 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -665,7 +665,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
* padding and trailer we need to account for the fact that
* skb->data points to skb_mac_header(skb) + ETH_HLEN.
*/
- pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN);
+ if (pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN))
+ return NULL;
/* Trap-to-host frame, no timestamp trailer */
} else {
*source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index a50429a62f74..56bb27d67a2e 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -351,17 +351,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
struct hsr_node *node_src)
{
bool was_multicast_frame;
- int res;
+ int res, recv_len;
was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST);
hsr_addr_subst_source(node_src, skb);
skb_pull(skb, ETH_HLEN);
+ recv_len = skb->len;
res = netif_rx(skb);
if (res == NET_RX_DROP) {
dev->stats.rx_dropped++;
} else {
dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_bytes += recv_len;
if (was_multicast_frame)
dev->stats.multicast++;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e8b9a9202fec..b0acf6e19aed 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -234,13 +234,20 @@ static void inet_free_ifa(struct in_ifaddr *ifa)
call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
}
+static void in_dev_free_rcu(struct rcu_head *head)
+{
+ struct in_device *idev = container_of(head, struct in_device, rcu_head);
+
+ kfree(rcu_dereference_protected(idev->mc_hash, 1));
+ kfree(idev);
+}
+
void in_dev_finish_destroy(struct in_device *idev)
{
struct net_device *dev = idev->dev;
WARN_ON(idev->ifa_list);
WARN_ON(idev->mc_list);
- kfree(rcu_dereference_protected(idev->mc_hash, 1));
#ifdef NET_REFCNT_DEBUG
pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
#endif
@@ -248,7 +255,7 @@ void in_dev_finish_destroy(struct in_device *idev)
if (!idev->dead)
pr_err("Freeing alive in_device %p\n", idev);
else
- kfree(idev);
+ call_rcu(&idev->rcu_head, in_dev_free_rcu);
}
EXPORT_SYMBOL(in_dev_finish_destroy);
@@ -298,12 +305,6 @@ out_kfree:
goto out;
}
-static void in_dev_rcu_put(struct rcu_head *head)
-{
- struct in_device *idev = container_of(head, struct in_device, rcu_head);
- in_dev_put(idev);
-}
-
static void inetdev_destroy(struct in_device *in_dev)
{
struct net_device *dev;
@@ -328,7 +329,7 @@ static void inetdev_destroy(struct in_device *in_dev)
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
arp_ifdown(dev);
- call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
+ in_dev_put(in_dev);
}
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f361d3d56be2..b5736ef16ed2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -841,6 +841,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
return -EINVAL;
}
+ if (!cfg->fc_table)
+ cfg->fc_table = RT_TABLE_MAIN;
+
return 0;
errout:
return err;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f721c308248b..ce9ff3c62e84 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -423,6 +423,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
nfi->fib_prefsrc == fi->fib_prefsrc &&
nfi->fib_priority == fi->fib_priority &&
nfi->fib_type == fi->fib_type &&
+ nfi->fib_tb_id == fi->fib_tb_id &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
!((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
@@ -888,9 +889,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
return 1;
}
- /* cannot match on nexthop object attributes */
- if (fi->nh)
- return 1;
+ if (fi->nh) {
+ if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp)
+ return 1;
+ return 0;
+ }
if (cfg->fc_oif || cfg->fc_gw_family) {
struct fib_nh *nh;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f866d6282b2b..cae9f1a4e059 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1492,24 +1492,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel_parm *p = &t->parms;
__be16 o_flags = p->o_flags;
- if (t->erspan_ver <= 2) {
- if (t->erspan_ver != 0 && !t->collect_md)
- o_flags |= TUNNEL_KEY;
-
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
- goto nla_put_failure;
-
- if (t->erspan_ver == 1) {
- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
- goto nla_put_failure;
- } else if (t->erspan_ver == 2) {
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
- goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
- goto nla_put_failure;
- }
- }
-
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
gre_tnl_flags_to_gre_flags(p->i_flags)) ||
@@ -1550,6 +1532,34 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ if (t->erspan_ver <= 2) {
+ if (t->erspan_ver != 0 && !t->collect_md)
+ t->parms.o_flags |= TUNNEL_KEY;
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+ goto nla_put_failure;
+
+ if (t->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+ goto nla_put_failure;
+ } else if (t->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+ goto nla_put_failure;
+ }
+ }
+
+ return ipgre_fill_info(skb, dev);
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static void erspan_setup(struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1628,7 +1638,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = {
.changelink = erspan_changelink,
.dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
- .fill_info = ipgre_fill_info,
+ .fill_info = erspan_fill_info,
.get_link_net = ip_tunnel_get_link_net,
};
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index bde333b24837..04b4ec07bb06 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -49,6 +49,11 @@
#include <net/transp_v6.h>
#endif
+#define ping_portaddr_for_each_entry(__sk, node, list) \
+ hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
+#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \
+ hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
+
struct ping_table {
struct hlist_nulls_head hash[PING_HTABLE_SIZE];
spinlock_t lock;
@@ -192,7 +197,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
return NULL;
}
- ping_portaddr_for_each_entry(sk, hnode, hslot) {
+ ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) {
isk = inet_sk(sk);
pr_debug("iterate\n");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e19507614f64..60fd91bb5171 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -920,6 +920,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (err < 0)
goto fail;
+ /* We prevent @rt from being freed. */
+ rcu_read_lock();
+
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
@@ -943,6 +946,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
+ rcu_read_unlock();
return 0;
}
@@ -950,6 +954,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
+ rcu_read_unlock();
return err;
slow_path_clean:
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 2e66598fac79..e8ebd343e2bf 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -452,6 +452,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
(status->encoding == RX_ENC_HE && streams > 8)))
return 0;
+ if (idx >= MCS_GROUP_RATES)
+ return 0;
+
duration = airtime_mcs_groups[group].duration[idx];
duration <<= airtime_mcs_groups[group].shift;
*overhead = 36 + (streams << 2);
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 500ed1b81250..7e2065e72915 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -662,6 +662,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
sdata->dev = ndev;
sdata->wpan_dev.wpan_phy = local->hw.phy;
sdata->local = local;
+ INIT_LIST_HEAD(&sdata->wpan_dev.list);
/* setup type-dependent data */
ret = ieee802154_setup_sdata(sdata, type);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b6dc6e260334..1dbc62537259 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2354,12 +2354,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
goto out;
}
- /* if we are invoked by the msk cleanup code, the subflow is
- * already orphaned
- */
- if (ssk->sk_socket)
- sock_orphan(ssk);
-
+ sock_orphan(ssk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2940,7 +2935,11 @@ cleanup:
if (ssk == msk->first)
subflow->fail_tout = 0;
- sock_orphan(ssk);
+ /* detach from the parent socket, but allow data_ready to
+ * push incoming data into the mptcp stack, to properly ack it
+ */
+ ssk->sk_socket = NULL;
+ ssk->sk_wq = NULL;
unlock_sock_fast(ssk, slow);
}
sock_orphan(sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 02a54d59697b..2159b5f9988f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1745,16 +1745,16 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk)
for (msk = head; msk; msk = next) {
struct sock *sk = (struct sock *)msk;
- bool slow, do_cancel_work;
+ bool do_cancel_work;
sock_hold(sk);
- slow = lock_sock_fast_nested(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
next = msk->dl_next;
msk->first = NULL;
msk->dl_next = NULL;
do_cancel_work = __mptcp_close(sk, 0);
- unlock_sock_fast(sk, slow);
+ release_sock(sk);
if (do_cancel_work)
mptcp_cancel_work(sk);
sock_put(sk);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2692139ce417..23b3fedd619a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -891,7 +891,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone = nf_ct_zone(ct);
if (!nf_ct_ext_valid_pre(ct->ext)) {
- NF_CT_STAT_INC(net, insert_failed);
+ NF_CT_STAT_INC_ATOMIC(net, insert_failed);
return -ETIMEDOUT;
}
@@ -938,7 +938,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
if (!nf_ct_ext_valid_post(ct->ext)) {
nf_ct_kill(ct);
- NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC_ATOMIC(net, drop);
return -ETIMEDOUT;
}
@@ -1275,7 +1275,7 @@ chaintoolong:
*/
if (!nf_ct_ext_valid_post(ct->ext)) {
nf_ct_kill(ct);
- NF_CT_STAT_INC(net, drop);
+ NF_CT_STAT_INC_ATOMIC(net, drop);
return NF_DROP;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d71150a40fb0..1286ae7d4609 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -328,8 +328,13 @@ nla_put_failure:
}
#ifdef CONFIG_NF_CONNTRACK_MARK
-static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark)
+static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
{
+ u32 mark = READ_ONCE(ct->mark);
+
+ if (!mark)
+ return 0;
+
if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
goto nla_put_failure;
return 0;
@@ -543,7 +548,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb,
static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
{
if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 ||
+ ctnetlink_dump_mark(skb, ct) < 0 ||
ctnetlink_dump_secctx(skb, ct) < 0 ||
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
@@ -722,7 +727,6 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
struct sk_buff *skb;
unsigned int type;
unsigned int flags = 0, group;
- u32 mark;
int err;
if (events & (1 << IPCT_DESTROY)) {
@@ -827,9 +831,8 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- mark = READ_ONCE(ct->mark);
- if ((events & (1 << IPCT_MARK) || mark) &&
- ctnetlink_dump_mark(skb, mark) < 0)
+ if (events & (1 << IPCT_MARK) &&
+ ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
nlmsg_end(skb, nlh);
@@ -2671,7 +2674,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
{
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
- u32 mark;
zone = nf_ct_zone(ct);
@@ -2733,8 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
goto nla_put_failure;
#ifdef CONFIG_NF_CONNTRACK_MARK
- mark = READ_ONCE(ct->mark);
- if (mark && ctnetlink_dump_mark(skb, mark) < 0)
+ if (ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
if (ctnetlink_dump_labels(skb, ct) < 0)
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 00b522890d77..0fdcdb2c9ae4 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -997,13 +997,13 @@ static void flow_offload_queue_work(struct flow_offload_work *offload)
struct net *net = read_pnet(&offload->flowtable->net);
if (offload->cmd == FLOW_CLS_REPLACE) {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_add);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add);
queue_work(nf_flow_offload_add_wq, &offload->work);
} else if (offload->cmd == FLOW_CLS_DESTROY) {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_del);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del);
queue_work(nf_flow_offload_del_wq, &offload->work);
} else {
- NF_FLOW_TABLE_STAT_INC(net, count_wq_stats);
+ NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats);
queue_work(nf_flow_offload_stats_wq, &offload->work);
}
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 4f9299b9dcdd..06d46d182634 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1162,6 +1162,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_field *f;
+ const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
@@ -1202,9 +1203,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
}
/* Validate */
+ start_p = start;
+ end_p = end;
nft_pipapo_for_each_field(f, i, m) {
- const u8 *start_p = start, *end_p = end;
-
if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX)
return -ENOSPC;
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 282c51051dcc..994a0a1efb58 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -240,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
target->sens_res = nfca_poll->sens_res;
target->sel_res = nfca_poll->sel_res;
target->nfcid1_len = nfca_poll->nfcid1_len;
+ if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1))
+ return -EPROTO;
if (target->nfcid1_len > 0) {
memcpy(target->nfcid1, nfca_poll->nfcid1,
target->nfcid1_len);
@@ -248,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params;
target->sensb_res_len = nfcb_poll->sensb_res_len;
+ if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res))
+ return -EPROTO;
if (target->sensb_res_len > 0) {
memcpy(target->sensb_res, nfcb_poll->sensb_res,
target->sensb_res_len);
@@ -256,6 +260,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params;
target->sensf_res_len = nfcf_poll->sensf_res_len;
+ if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res))
+ return -EPROTO;
if (target->sensf_res_len > 0) {
memcpy(target->sensf_res, nfcf_poll->sensf_res,
target->sensf_res_len);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6ce8dd19f33c..1ab65f7f2a0a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2293,8 +2293,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (skb->ip_summed == CHECKSUM_PARTIAL)
status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
- (skb->ip_summed == CHECKSUM_COMPLETE ||
- skb_csum_unnecessary(skb)))
+ skb_csum_unnecessary(skb))
status |= TP_STATUS_CSUM_VALID;
if (snaplen > res)
@@ -3520,8 +3519,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (skb->ip_summed == CHECKSUM_PARTIAL)
aux.tp_status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
- (skb->ip_summed == CHECKSUM_COMPLETE ||
- skb_csum_unnecessary(skb)))
+ skb_csum_unnecessary(skb))
aux.tp_status |= TP_STATUS_CSUM_VALID;
aux.tp_len = origlen;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index ef9fceadef8d..ee6514af830f 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt)
}
}
+static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid)
+{
+ struct sctp_sched_ops *sched;
+
+ if (!SCTP_SO(stream, sid)->ext)
+ return;
+
+ sched = sctp_sched_ops_from_stream(stream);
+ sched->free_sid(stream, sid);
+ kfree(SCTP_SO(stream, sid)->ext);
+ SCTP_SO(stream, sid)->ext = NULL;
+}
+
/* Migrates chunks from stream queues to new stream queues if needed,
* but not across associations. Also, removes those chunks to streams
* higher than the new max.
@@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
* sctp_stream_update will swap ->out pointers.
*/
for (i = 0; i < outcnt; i++) {
- kfree(SCTP_SO(new, i)->ext);
+ sctp_stream_free_ext(new, i);
SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext;
SCTP_SO(stream, i)->ext = NULL;
}
}
- for (i = outcnt; i < stream->outcnt; i++) {
- kfree(SCTP_SO(stream, i)->ext);
- SCTP_SO(stream, i)->ext = NULL;
- }
+ for (i = outcnt; i < stream->outcnt; i++)
+ sctp_stream_free_ext(stream, i);
}
static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
@@ -174,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream)
struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
int i;
- sched->free(stream);
+ sched->unsched_all(stream);
for (i = 0; i < stream->outcnt; i++)
- kfree(SCTP_SO(stream, i)->ext);
+ sctp_stream_free_ext(stream, i);
genradix_free(&stream->out);
genradix_free(&stream->in);
}
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 1ad565ed5627..7c8f9d89e16a 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -46,6 +46,10 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
return 0;
}
+static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid)
+{
+}
+
static void sctp_sched_fcfs_free(struct sctp_stream *stream)
{
}
@@ -96,6 +100,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
.get = sctp_sched_fcfs_get,
.init = sctp_sched_fcfs_init,
.init_sid = sctp_sched_fcfs_init_sid,
+ .free_sid = sctp_sched_fcfs_free_sid,
.free = sctp_sched_fcfs_free,
.enqueue = sctp_sched_fcfs_enqueue,
.dequeue = sctp_sched_fcfs_dequeue,
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 80b5a2c4cbc7..4fc9f2923ed1 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c
@@ -204,6 +204,24 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
return sctp_sched_prio_set(stream, sid, 0, gfp);
}
+static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid)
+{
+ struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head;
+ int i;
+
+ if (!prio)
+ return;
+
+ SCTP_SO(stream, sid)->ext->prio_head = NULL;
+ for (i = 0; i < stream->outcnt; i++) {
+ if (SCTP_SO(stream, i)->ext &&
+ SCTP_SO(stream, i)->ext->prio_head == prio)
+ return;
+ }
+
+ kfree(prio);
+}
+
static void sctp_sched_prio_free(struct sctp_stream *stream)
{
struct sctp_stream_priorities *prio, *n;
@@ -323,6 +341,7 @@ static struct sctp_sched_ops sctp_sched_prio = {
.get = sctp_sched_prio_get,
.init = sctp_sched_prio_init,
.init_sid = sctp_sched_prio_init_sid,
+ .free_sid = sctp_sched_prio_free_sid,
.free = sctp_sched_prio_free,
.enqueue = sctp_sched_prio_enqueue,
.dequeue = sctp_sched_prio_dequeue,
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
index ff425aed62c7..cc444fe0d67c 100644
--- a/net/sctp/stream_sched_rr.c
+++ b/net/sctp/stream_sched_rr.c
@@ -90,6 +90,10 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
return 0;
}
+static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid)
+{
+}
+
static void sctp_sched_rr_free(struct sctp_stream *stream)
{
sctp_sched_rr_unsched_all(stream);
@@ -177,6 +181,7 @@ static struct sctp_sched_ops sctp_sched_rr = {
.get = sctp_sched_rr_get,
.init = sctp_sched_rr_init,
.init_sid = sctp_sched_rr_init_sid,
+ .free_sid = sctp_sched_rr_free_sid,
.free = sctp_sched_rr_free,
.enqueue = sctp_sched_rr_enqueue,
.dequeue = sctp_sched_rr_dequeue,
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index f09316a9035f..d67440de011e 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1971,6 +1971,9 @@ rcv:
/* Ok, everything's fine, try to synch own keys according to peers' */
tipc_crypto_key_synch(rx, *skb);
+ /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */
+ skb_cb = TIPC_SKB_CB(*skb);
+
/* Mark skb decrypted */
skb_cb->decrypted = 1;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e260c0d557f5..b3ce24823f50 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2224,7 +2224,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (tipc_own_addr(l->net) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
- skb_linearize(skb);
+ if (skb_linearize(skb))
+ goto exit;
+
hdr = buf_msg(skb);
data = msg_data(hdr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b48d97cbbe29..49ddc484c4fe 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1689,6 +1689,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
struct tipc_node *n;
struct sk_buff_head xmitq;
bool node_up = false;
+ struct net *peer_net;
int bearer_id;
int rc;
@@ -1705,18 +1706,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
return -EHOSTUNREACH;
}
+ rcu_read_lock();
tipc_node_read_lock(n);
node_up = node_is_up(n);
- if (node_up && n->peer_net && check_net(n->peer_net)) {
+ peer_net = n->peer_net;
+ tipc_node_read_unlock(n);
+ if (node_up && peer_net && check_net(peer_net)) {
/* xmit inner linux container */
- tipc_lxc_xmit(n->peer_net, list);
+ tipc_lxc_xmit(peer_net, list);
if (likely(skb_queue_empty(list))) {
- tipc_node_read_unlock(n);
+ rcu_read_unlock();
tipc_node_put(n);
return 0;
}
}
+ rcu_read_unlock();
+ tipc_node_read_lock(n);
bearer_id = n->active_links[selector & 1];
if (unlikely(bearer_id == INVALID_BEARER_ID)) {
tipc_node_read_unlock(n);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 105f522a89fe..616b55c5b890 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -114,14 +114,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql);
}
-static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb)
+static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb,
+ struct user_namespace *user_ns)
{
- uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk));
+ uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk));
return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
}
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- u32 portid, u32 seq, u32 flags, int sk_ino)
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags, int sk_ino)
{
struct nlmsghdr *nlh;
struct unix_diag_msg *rep;
@@ -167,7 +169,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
goto out_nlmsg_trim;
if ((req->udiag_show & UDIAG_SHOW_UID) &&
- sk_diag_dump_uid(sk, skb))
+ sk_diag_dump_uid(sk, skb, user_ns))
goto out_nlmsg_trim;
nlmsg_end(skb, nlh);
@@ -179,7 +181,8 @@ out_nlmsg_trim:
}
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- u32 portid, u32 seq, u32 flags)
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u32 flags)
{
int sk_ino;
@@ -190,7 +193,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
if (!sk_ino)
return 0;
- return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino);
+ return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino);
}
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -214,7 +217,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
if (!(req->udiag_states & (1 << sk->sk_state)))
goto next;
- if (sk_diag_dump(sk, skb, req,
+ if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0) {
@@ -282,7 +285,8 @@ again:
if (!rep)
goto out;
- err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid,
+ err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk),
+ NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, req->udiag_ino);
if (err < 0) {
nlmsg_free(rep);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index da752b0cc752..3d86482e83f5 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -330,7 +330,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
* determine if they are the same ie.
*/
if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) {
- if (!memcmp(tmp_old + 2, tmp + 2, 5)) {
+ if (tmp_old[1] >= 5 && tmp[1] >= 5 &&
+ !memcmp(tmp_old + 2, tmp + 2, 5)) {
/* same vendor ie, copy from
* subelement
*/
@@ -2526,10 +2527,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
const struct cfg80211_bss_ies *ies1, *ies2;
size_t ielen = len - offsetof(struct ieee80211_mgmt,
u.probe_resp.variable);
- struct cfg80211_non_tx_bss non_tx_data;
+ struct cfg80211_non_tx_bss non_tx_data = {};
res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
len, gfp);
+
+ /* don't do any further MBSSID handling for S1G */
+ if (ieee80211_is_s1g_beacon(mgmt->frame_control))
+ return res;
+
if (!res || !wiphy->support_mbssid ||
!cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
return res;
diff --git a/security/keys/trusted-keys/trusted_tee.c b/security/keys/trusted-keys/trusted_tee.c
index c8626686ee1b..ac3e270ade69 100644
--- a/security/keys/trusted-keys/trusted_tee.c
+++ b/security/keys/trusted-keys/trusted_tee.c
@@ -219,7 +219,8 @@ static int trusted_tee_get_random(unsigned char *key, size_t key_len)
static int optee_ctx_match(struct tee_ioctl_version_data *ver, const void *data)
{
- if (ver->impl_id == TEE_IMPL_ID_OPTEE)
+ if (ver->impl_id == TEE_IMPL_ID_OPTEE &&
+ ver->gen_caps & TEE_GEN_CAP_REG_MEM)
return 1;
else
return 0;
diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c
index f99e00083141..4c677c8546c7 100644
--- a/sound/firewire/dice/dice-stream.c
+++ b/sound/firewire/dice/dice-stream.c
@@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate,
static int select_clock(struct snd_dice *dice, unsigned int rate)
{
- __be32 reg;
+ __be32 reg, new;
u32 data;
int i;
int err;
@@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate)
if (completion_done(&dice->clock_accepted))
reinit_completion(&dice->clock_accepted);
- reg = cpu_to_be32(data);
+ new = cpu_to_be32(data);
err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT,
- &reg, sizeof(reg));
+ &new, sizeof(new));
if (err < 0)
return err;
if (wait_for_completion_timeout(&dice->clock_accepted,
- msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0)
- return -ETIMEDOUT;
+ msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) {
+ if (reg != new)
+ return -ETIMEDOUT;
+ }
return 0;
}
diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c
index 51721edd8f53..e88d9ff95cdf 100644
--- a/sound/soc/codecs/cs42l51.c
+++ b/sound/soc/codecs/cs42l51.c
@@ -143,7 +143,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = {
0, 0xA0, 96, adc_att_tlv),
SOC_DOUBLE_R_SX_TLV("PGA Volume",
CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL,
- 0, 0x19, 30, pga_tlv),
+ 0, 0x1A, 30, pga_tlv),
SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0),
SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0),
SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0),
diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c
index a969547708d4..52bb55724724 100644
--- a/sound/soc/codecs/tlv320adc3xxx.c
+++ b/sound/soc/codecs/tlv320adc3xxx.c
@@ -14,6 +14,7 @@
#include <dt-bindings/sound/tlv320adc3xxx.h>
#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/io.h>
@@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = {
static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx)
{
+#ifdef CONFIG_GPIOLIB
gpiochip_remove(&adc3xxx->gpio_chip);
+#endif
}
static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx)
diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c
index 79ef4e269bc9..4b86ef82fd93 100644
--- a/sound/soc/fsl/fsl_micfil.c
+++ b/sound/soc/fsl/fsl_micfil.c
@@ -194,6 +194,25 @@ static int fsl_micfil_reset(struct device *dev)
if (ret)
return ret;
+ /*
+ * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined
+ * as non-volatile register, so SRES still remain in regmap
+ * cache after set, that every update of REG_MICFIL_CTRL1,
+ * software reset happens. so clear it explicitly.
+ */
+ ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1,
+ MICFIL_CTRL1_SRES);
+ if (ret)
+ return ret;
+
+ /*
+ * Set SRES should clear CHnF flags, But even add delay here
+ * the CHnF may not be cleared sometimes, so clear CHnF explicitly.
+ */
+ ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF);
+ if (ret)
+ return ret;
+
return 0;
}
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index bd88de056358..55b009d3c681 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
val = ucontrol->value.integer.value[0];
if (mc->platform_max && val > mc->platform_max)
return -EINVAL;
- if (val > max - min)
+ if (val > max)
return -EINVAL;
val_mask = mask << shift;
val = (val + min) & mask;
@@ -464,10 +464,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol,
ret = err;
if (snd_soc_volsw_is_stereo(mc)) {
- unsigned int val2;
+ unsigned int val2 = ucontrol->value.integer.value[1];
+
+ if (mc->platform_max && val2 > mc->platform_max)
+ return -EINVAL;
+ if (val2 > max)
+ return -EINVAL;
val_mask = mask << rshift;
- val2 = (ucontrol->value.integer.value[1] + min) & mask;
+ val2 = (val2 + min) & mask;
val2 = val2 << rshift;
err = snd_soc_component_update_bits(component, reg2, val_mask,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 184ce1684dcd..91b7106a4a73 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -11169,7 +11169,7 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf
}
*link = bpf_program__attach_raw_tracepoint(prog, tp_name);
- return libbpf_get_error(link);
+ return libbpf_get_error(*link);
}
/* Common logic for all BPF program types that attach to a btf_id */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index f3a8e8e74eb8..d504d96adc83 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -234,7 +234,7 @@ static int probe_map_create(enum bpf_map_type map_type)
case BPF_MAP_TYPE_USER_RINGBUF:
key_size = 0;
value_size = 0;
- max_entries = 4096;
+ max_entries = sysconf(_SC_PAGE_SIZE);
break;
case BPF_MAP_TYPE_STRUCT_OPS:
/* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index d285171d4b69..6af142953a94 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -77,6 +77,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
__u32 len = sizeof(info);
struct epoll_event *e;
struct ring *r;
+ __u64 mmap_sz;
void *tmp;
int err;
@@ -115,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
r->mask = info.max_entries - 1;
/* Map writable consumer page */
- tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
- map_fd, 0);
+ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0);
if (tmp == MAP_FAILED) {
err = -errno;
pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
@@ -129,8 +129,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
* data size to allow simple reading of samples that wrap around the
* end of a ring buffer. See kernel implementation for details.
* */
- tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
- MAP_SHARED, map_fd, rb->page_size);
+ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
+ if (mmap_sz != (__u64)(size_t)mmap_sz) {
+ pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries);
+ return libbpf_err(-E2BIG);
+ }
+ tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size);
if (tmp == MAP_FAILED) {
err = -errno;
ringbuf_unmap_ring(rb, r);
@@ -348,6 +352,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
{
struct bpf_map_info info;
__u32 len = sizeof(info);
+ __u64 mmap_sz;
void *tmp;
struct epoll_event *rb_epoll;
int err;
@@ -384,8 +389,13 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
* simple reading and writing of samples that wrap around the end of
* the buffer. See the kernel implementation for details.
*/
- tmp = mmap(NULL, rb->page_size + 2 * info.max_entries,
- PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size);
+ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries;
+ if (mmap_sz != (__u64)(size_t)mmap_sz) {
+ pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries);
+ return -E2BIG;
+ }
+ tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
+ map_fd, rb->page_size);
if (tmp == MAP_FAILED) {
err = -errno;
pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n",
@@ -476,6 +486,10 @@ void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size)
__u64 cons_pos, prod_pos;
struct ringbuf_hdr *hdr;
+ /* The top two bits are used as special flags */
+ if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT))
+ return errno = E2BIG, NULL;
+
/* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in
* the kernel.
*/
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index ee819a402b69..11a1d2d4f681 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -464,9 +464,10 @@ to address dependencies, since the address of a location accessed
through a pointer will depend on the value read earlier from that
pointer.
-Finally, a read event and another memory access event are linked by a
-control dependency if the value obtained by the read affects whether
-the second event is executed at all. Simple example:
+Finally, a read event X and a write event Y are linked by a control
+dependency if Y syntactically lies within an arm of an if statement and
+X affects the evaluation of the if condition via a data or address
+dependency (or similarly for a switch statement). Simple example:
int x, y;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 43ec14c29a60..a7f1e6c8bb0a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -999,6 +999,16 @@ static const char *uaccess_safe_builtin[] = {
"__tsan_read_write4",
"__tsan_read_write8",
"__tsan_read_write16",
+ "__tsan_volatile_read1",
+ "__tsan_volatile_read2",
+ "__tsan_volatile_read4",
+ "__tsan_volatile_read8",
+ "__tsan_volatile_read16",
+ "__tsan_volatile_write1",
+ "__tsan_volatile_write2",
+ "__tsan_volatile_write4",
+ "__tsan_volatile_write8",
+ "__tsan_volatile_write16",
"__tsan_atomic8_load",
"__tsan_atomic16_load",
"__tsan_atomic32_load",
diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
index 099eb4dfd4f7..18405c3b7cee 100644
--- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
+++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
@@ -458,7 +458,7 @@ static void test_sk_storage_map_basic(void)
struct {
int cnt;
int lock;
- } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value;
+ } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value;
struct bpf_map_create_opts bad_xattr;
int btf_fd, map_fd, sk_fd, err;
@@ -483,38 +483,41 @@ static void test_sk_storage_map_basic(void)
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value,
BPF_EXIST | BPF_F_LOCK);
CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt and update with BPF_EXIST */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST);
CHECK(err, "bpf_map_update_elem(BPF_EXIST)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Update with BPF_NOEXIST */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value,
BPF_NOEXIST | BPF_F_LOCK);
CHECK(!err || errno != EEXIST,
@@ -526,22 +529,23 @@ static void test_sk_storage_map_basic(void)
value.cnt -= 1;
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt again and update with map_flags == 0 */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Test delete elem */
err = bpf_map_delete_elem(map_fd, &sk_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index d457a55ff408..a4b4133d39e9 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -358,10 +358,12 @@ static int get_syms(char ***symsp, size_t *cntp)
* We attach to almost all kernel functions and some of them
* will cause 'suspicious RCU usage' when fprobe is attached
* to them. Filter out the current culprits - arch_cpu_idle
- * and rcu_* functions.
+ * default_idle and rcu_* functions.
*/
if (!strcmp(name, "arch_cpu_idle"))
continue;
+ if (!strcmp(name, "default_idle"))
+ continue;
if (!strncmp(name, "rcu_", 4))
continue;
if (!strcmp(name, "bpf_dispatcher_xdp_func"))
@@ -400,7 +402,7 @@ error:
return err;
}
-static void test_bench_attach(void)
+void serial_test_kprobe_multi_bench_attach(void)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
struct kprobe_multi_empty *skel = NULL;
@@ -468,6 +470,4 @@ void test_kprobe_multi_test(void)
test_attach_api_syms();
if (test__start_subtest("attach_api_fails"))
test_attach_api_fails();
- if (test__start_subtest("bench_attach"))
- test_bench_attach();
}
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 22b31ebb3513..258ddc565deb 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -19,12 +19,12 @@
/*
- * Memory cgroup charging is performed using percpu batches 32 pages
+ * Memory cgroup charging is performed using percpu batches 64 pages
* big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So
* the maximum discrepancy between charge and vmstat entries is number
- * of cpus multiplied by 32 pages.
+ * of cpus multiplied by 64 pages.
*/
-#define MAX_VMSTAT_ERROR (4096 * 32 * get_nprocs())
+#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
static int alloc_dcache(const char *cgroup, void *arg)
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 3d7adee7a3e6..ff8fe93f679c 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
bind_bhash
cmsg_sender
+diag_uid
fin_ack_lat
gro
hwtstamp_config
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 969620ae9928..1e4b397cece6 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,3 +1,3 @@
-TEST_GEN_PROGS := test_unix_oob unix_connect
+TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
new file mode 100644
index 000000000000..5b88f7129fea
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <unistd.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/unix_diag.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(diag_uid)
+{
+ int netlink_fd;
+ int unix_fd;
+ __u32 inode;
+ __u64 cookie;
+};
+
+FIXTURE_VARIANT(diag_uid)
+{
+ int unshare;
+ int udiag_show;
+};
+
+FIXTURE_VARIANT_ADD(diag_uid, uid)
+{
+ .unshare = 0,
+ .udiag_show = UDIAG_SHOW_UID
+};
+
+FIXTURE_VARIANT_ADD(diag_uid, uid_unshare)
+{
+ .unshare = CLONE_NEWUSER,
+ .udiag_show = UDIAG_SHOW_UID
+};
+
+FIXTURE_SETUP(diag_uid)
+{
+ struct stat file_stat;
+ socklen_t optlen;
+ int ret;
+
+ if (variant->unshare)
+ ASSERT_EQ(unshare(variant->unshare), 0);
+
+ self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ ASSERT_NE(self->netlink_fd, -1);
+
+ self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_NE(self->unix_fd, -1);
+
+ ret = fstat(self->unix_fd, &file_stat);
+ ASSERT_EQ(ret, 0);
+
+ self->inode = file_stat.st_ino;
+
+ optlen = sizeof(self->cookie);
+ ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(diag_uid)
+{
+ close(self->netlink_fd);
+ close(self->unix_fd);
+}
+
+int send_request(struct __test_metadata *_metadata,
+ FIXTURE_DATA(diag_uid) *self,
+ const FIXTURE_VARIANT(diag_uid) *variant)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct unix_diag_req udr;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST
+ },
+ .udr = {
+ .sdiag_family = AF_UNIX,
+ .udiag_ino = self->inode,
+ .udiag_cookie = {
+ (__u32)self->cookie,
+ (__u32)(self->cookie >> 32)
+ },
+ .udiag_show = variant->udiag_show
+ }
+ };
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec iov = {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+
+ return sendmsg(self->netlink_fd, &msg, 0);
+}
+
+void render_response(struct __test_metadata *_metadata,
+ struct unix_diag_req *udr, __u32 len)
+{
+ unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr));
+ struct rtattr *attr;
+ uid_t uid;
+
+ ASSERT_GT(len, sizeof(*udr));
+ ASSERT_EQ(udr->sdiag_family, AF_UNIX);
+
+ attr = (struct rtattr *)(udr + 1);
+ ASSERT_NE(RTA_OK(attr, rta_len), 0);
+ ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID);
+
+ uid = *(uid_t *)RTA_DATA(attr);
+ ASSERT_EQ(uid, getuid());
+}
+
+void receive_response(struct __test_metadata *_metadata,
+ FIXTURE_DATA(diag_uid) *self)
+{
+ long buf[8192 / sizeof(long)];
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+ struct unix_diag_req *udr;
+ struct nlmsghdr *nlh;
+ int ret;
+
+ ret = recvmsg(self->netlink_fd, &msg, 0);
+ ASSERT_GT(ret, 0);
+
+ nlh = (struct nlmsghdr *)buf;
+ ASSERT_NE(NLMSG_OK(nlh, ret), 0);
+ ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY);
+
+ render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len);
+
+ nlh = NLMSG_NEXT(nlh, ret);
+ ASSERT_EQ(NLMSG_OK(nlh, ret), 0);
+}
+
+TEST_F(diag_uid, 1)
+{
+ int ret;
+
+ ret = send_request(_metadata, self, variant);
+ ASSERT_GT(ret, 0);
+
+ receive_response(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index ead7963b9bf0..bd89198cd817 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -43,5 +43,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_BAREUDP=m
CONFIG_IPV6_IOAM6_LWTUNNEL=y
-CONFIG_CRYPTO_SM4=y
+CONFIG_CRYPTO_SM4_GENERIC=y
CONFIG_AMT=m
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index ee5e98204d3d..a47b26ab48f2 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1228,6 +1228,17 @@ ipv4_fcnal()
run_cmd "$IP ro add 172.16.101.0/24 nhid 21"
run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1"
log_test $? 2 "Delete multipath route with only nh id based entry"
+
+ run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1"
+ run_cmd "$IP ro add 172.16.102.0/24 nhid 22"
+ run_cmd "$IP ro del 172.16.102.0/24 dev veth1"
+ log_test $? 2 "Delete route when specifying only nexthop device"
+
+ run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6"
+ log_test $? 2 "Delete route when specifying only gateway"
+
+ run_cmd "$IP ro del 172.16.102.0/24"
+ log_test $? 0 "Delete route when not specifying nexthop attributes"
}
ipv4_grp_fcnal()
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 2271a8727f62..5637b5dadabd 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1711,13 +1711,21 @@ ipv4_del_addr_test()
$IP addr add dev dummy1 172.16.104.1/24
$IP addr add dev dummy1 172.16.104.11/24
+ $IP addr add dev dummy1 172.16.104.12/24
+ $IP addr add dev dummy1 172.16.104.13/24
$IP addr add dev dummy2 172.16.104.1/24
$IP addr add dev dummy2 172.16.104.11/24
+ $IP addr add dev dummy2 172.16.104.12/24
$IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+ $IP route add 172.16.106.0/24 dev lo src 172.16.104.12
+ $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13
$IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
set +e
# removing address from device in vrf should only remove route from vrf table
+ echo " Regular FIB info"
+
$IP addr del dev dummy2 172.16.104.11/24
$IP ro ls vrf red | grep -q 172.16.105.0/24
log_test $? 1 "Route removed from VRF when source address deleted"
@@ -1735,6 +1743,35 @@ ipv4_del_addr_test()
$IP ro ls vrf red | grep -q 172.16.105.0/24
log_test $? 0 "Route in VRF is not removed by address delete"
+ # removing address from device in vrf should only remove route from vrf
+ # table even when the associated fib info only differs in table ID
+ echo " Identical FIB info with different table ID"
+
+ $IP addr del dev dummy2 172.16.104.12/24
+ $IP ro ls vrf red | grep -q 172.16.106.0/24
+ log_test $? 1 "Route removed from VRF when source address deleted"
+
+ $IP ro ls | grep -q 172.16.106.0/24
+ log_test $? 0 "Route in default VRF not removed"
+
+ $IP addr add dev dummy2 172.16.104.12/24
+ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
+
+ $IP addr del dev dummy1 172.16.104.12/24
+ $IP ro ls | grep -q 172.16.106.0/24
+ log_test $? 1 "Route removed in default VRF when source address deleted"
+
+ $IP ro ls vrf red | grep -q 172.16.106.0/24
+ log_test $? 0 "Route in VRF is not removed by address delete"
+
+ # removing address from device in default vrf should remove route from
+ # the default vrf even when route was inserted with a table ID of 0.
+ echo " Table ID 0"
+
+ $IP addr del dev dummy1 172.16.104.13/24
+ $IP ro ls | grep -q 172.16.107.0/24
+ log_test $? 1 "Route removed in default VRF when source address deleted"
+
$IP li del dummy1
$IP li del dummy2
cleanup
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 0900c5438fbb..275491be3da2 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -782,7 +782,7 @@ kci_test_ipsec_offload()
tmpl proto esp src $srcip dst $dstip spi 9 \
mode transport reqid 42
check_err $?
- ip x p add dir out src $dstip/24 dst $srcip/24 \
+ ip x p add dir in src $dstip/24 dst $srcip/24 \
tmpl proto esp src $dstip dst $srcip spi 9 \
mode transport reqid 42
check_err $?
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
index 0a49907cd4fe..da5bfd834eff 100755
--- a/tools/testing/selftests/net/toeplitz.sh
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -32,7 +32,7 @@ DEV="eth0"
# This is determined by reading the RSS indirection table using ethtool.
get_rss_cfg_num_rxqs() {
echo $(ethtool -x "${DEV}" |
- egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+ grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
cut -d: -f2- |
awk '{$1=$1};1' |
tr ' ' '\n' |
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 69ea659caca9..22f1e1d73fa8 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -95,6 +95,7 @@ all: run
sysroot: sysroot/$(ARCH)/include
sysroot/$(ARCH)/include:
+ $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
$(QUIET_MKDIR)mkdir -p sysroot
$(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone
$(Q)mv sysroot/sysroot sysroot/$(ARCH)
@@ -133,3 +134,5 @@ clean:
$(Q)rm -rf initramfs
$(call QUIET_CLEAN, run.out)
$(Q)rm -rf run.out
+
+.PHONY: sysroot/$(ARCH)/include
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 78bced95ac63..f14f5076fb6d 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -565,6 +565,13 @@ int run_stdlib(int min, int max)
CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break;
CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break;
CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break;
+ CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break;
+ CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break;
+ CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break;
+ CASE_TEST(memcmp_20_e0); EXPECT_LT(1, memcmp("aaa\x20", "aaa\xe0", 4), 0); break;
+ CASE_TEST(memcmp_e0_20); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x20", 4), 0); break;
+ CASE_TEST(memcmp_80_e0); EXPECT_LT(1, memcmp("aaa\x80", "aaa\xe0", 4), 0); break;
+ CASE_TEST(memcmp_e0_80); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x80", 4), 0); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
diff --git a/tools/testing/selftests/rcutorture/bin/config2csv.sh b/tools/testing/selftests/rcutorture/bin/config2csv.sh
index d5a16631b16e..0cf55f1bf654 100755
--- a/tools/testing/selftests/rcutorture/bin/config2csv.sh
+++ b/tools/testing/selftests/rcutorture/bin/config2csv.sh
@@ -30,9 +30,8 @@ else
fi
scenarios="`echo $scenariosarg | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
-T=/tmp/config2latex.sh.$$
+T=`mktemp -d /tmp/config2latex.sh.XXXXXX`
trap 'rm -rf $T' 0
-mkdir $T
cat << '---EOF---' >> $T/p.awk
END {
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
index 90016c359e83..b3d2e7efa40c 100755
--- a/tools/testing/selftests/rcutorture/bin/config_override.sh
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -29,9 +29,8 @@ else
exit 1
fi
-T=${TMPDIR-/tmp}/config_override.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/config_override.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
awk '
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index 31584cee84d7..83fac1852ab2 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -7,9 +7,8 @@
#
# Authors: Paul E. McKenney <[email protected]>
-T=${TMPDIR-/tmp}/abat-chk-config.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
cat $1 > $T/.config
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index d6e5ce084b1c..28bdb3ac7ba6 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -15,9 +15,8 @@
#
# Authors: Paul E. McKenney <[email protected]>
-T=${TMPDIR-/tmp}/configinit.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/configinit.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
# Capture config spec file.
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 0941f1ddab65..8a968fbda02c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -12,9 +12,8 @@
scriptname=$0
args="$*"
-T=${TMPDIR-/tmp}/kvm-again.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-again.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
if ! test -d tools/testing/selftests/rcutorture/bin
then
@@ -51,27 +50,56 @@ RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh
+bootargs=
dryrun=
dur=
default_link="cp -R"
-rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`"
+resdir="`pwd`/tools/testing/selftests/rcutorture/res"
+rundir="$resdir/`date +%Y.%m.%d-%H.%M.%S-again`"
+got_datestamp=
+got_rundir=
startdate="`date`"
starttime="`get_starttime`"
usage () {
echo "Usage: $scriptname $oldrun [ arguments ]:"
+ echo " --bootargs kernel-boot-arguments"
+ echo " --datestamp string"
echo " --dryrun"
echo " --duration minutes | <seconds>s | <hours>h | <days>d"
echo " --link hard|soft|copy"
echo " --remote"
echo " --rundir /new/res/path"
+ echo "Command line: $scriptname $args"
exit 1
}
while test $# -gt 0
do
case "$1" in
+ --bootargs|--bootarg)
+ checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
+ bootargs="$bootargs $2"
+ shift
+ ;;
+ --datestamp)
+ checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
+ if test -n "$got_rundir" || test -n "$got_datestamp"
+ then
+ echo Only one of --datestamp or --rundir may be specified
+ usage
+ fi
+ got_datestamp=y
+ ds=$2
+ rundir="$resdir/$ds"
+ if test -e "$rundir"
+ then
+ echo "--datestamp $2: Already exists."
+ usage
+ fi
+ shift
+ ;;
--dryrun)
dryrun=1
;;
@@ -113,6 +141,12 @@ do
;;
--rundir)
checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error'
+ if test -n "$got_rundir" || test -n "$got_datestamp"
+ then
+ echo Only one of --datestamp or --rundir may be specified
+ usage
+ fi
+ got_rundir=y
rundir=$2
if test -e "$rundir"
then
@@ -122,8 +156,11 @@ do
shift
;;
*)
- echo Unknown argument $1
- usage
+ if test -n "$1"
+ then
+ echo Unknown argument $1
+ usage
+ fi
;;
esac
shift
@@ -156,7 +193,7 @@ do
qemu_cmd_dir="`dirname "$i"`"
kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`"
jitter_dir="`dirname "$kernel_dir"`"
- kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i
+ kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur "$bootargs" < $T/qemu-cmd > $i
if test -n "$arg_remote"
then
echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
index f99b2c146f83..46b08cd16ba5 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
@@ -7,9 +7,8 @@
#
# Usage: kvm-assign-cpus.sh /path/to/sysfs
-T=/tmp/kvm-assign-cpus.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-assign-cpus.sh.XXXXXX`"
trap 'rm -rf $T' 0 2
-mkdir $T
sysfsdir=${1-/sys/devices/system/node}
if ! cd "$sysfsdir" > $T/msg 2>&1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 5ad973dca820..e28a82851f7c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -23,9 +23,8 @@ then
fi
resdir=${2}
-T=${TMPDIR-/tmp}/test-linux.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-build.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
cp ${config_template} $T/config
cat << ___EOF___ >> $T/config
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
index ee886b40a5d2..2b56baceb05d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
@@ -18,9 +18,8 @@ then
exit 1
fi
-T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-end-run-stats.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
PATH=${RCUTORTURE}/bin:$PATH; export PATH
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 0789c5606d2a..1df7e695edf7 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -30,7 +30,7 @@ do
resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
head -1 $resdir/log
fi
- TORTURE_SUITE="`cat $i/../torture_suite`"
+ TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE
configfile=`echo $i | sed -e 's,^.*/,,'`
rm -f $i/console.log.*.diags
case "${TORTURE_SUITE}" in
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 9f0a5d5ff2dd..a2328163eba1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -34,19 +34,18 @@ fi
shift
# Pathnames:
-# T: /tmp/kvm-remote.sh.$$
-# resdir: /tmp/kvm-remote.sh.$$/res
-# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
+# T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
+# resdir: /tmp/kvm-remote.sh.NNNNNN/res
+# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
# oldrun: `pwd`/tools/testing/.../res/$otherds
#
# Pathname segments:
-# TD: kvm-remote.sh.$$
+# TD: kvm-remote.sh.NNNNNN
# ds: yyyy.mm.dd-hh.mm.ss-remote
-TD=kvm-remote.sh.$$
-T=${TMPDIR-/tmp}/$TD
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
+TD="`basename "$T"`"
resdir="$T/res"
ds=`date +%Y.%m.%d-%H.%M.%S`-remote
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
index 1e29d656501b..c3808c490d92 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -13,9 +13,8 @@
#
# Authors: Paul E. McKenney <[email protected]>
-T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
echo ---- Running batch $*
# Check arguments
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
index 44280582c594..76f24cd5825b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -17,9 +17,8 @@
#
# Authors: Paul E. McKenney <[email protected]>
-T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
resdir="$1"
if ! test -d "$resdir"
@@ -109,7 +108,7 @@ do
if test $kruntime -lt $seconds
then
echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
- grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
+ grep "^(qemu) qemu:" $resdir/kvm-test-1-run*.sh.out >> $resdir/Warnings 2>&1
killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
if test -n "$killpid"
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index f4c8055dbf7a..d2a3710a5f2a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -25,9 +25,8 @@
#
# Authors: Paul E. McKenney <[email protected]>
-T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
. functions.sh
. $CONFIGFRAG/ver_functions.sh
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
index d40b4e60a50c..75a2610a27f3 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
@@ -3,10 +3,14 @@
#
# Transform a qemu-cmd file to allow reuse.
#
-# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out
+# Usage: kvm-transform.sh bzImage console.log jitter_dir seconds [ bootargs ] < qemu-cmd-in > qemu-cmd-out
#
# bzImage: Kernel and initrd from the same prior kvm.sh run.
# console.log: File into which to place console output.
+# jitter_dir: Jitter directory for TORTURE_JITTER_START and
+# TORTURE_JITTER_STOP environment variables.
+# seconds: Run duaration for *.shutdown_secs module parameter.
+# bootargs: New kernel boot parameters. Beware of Robert Tables.
#
# The original qemu-cmd file is provided on standard input.
# The transformed qemu-cmd file is on standard output.
@@ -17,6 +21,9 @@
#
# Authors: Paul E. McKenney <[email protected]>
+T=`mktemp -d /tmp/kvm-transform.sh.XXXXXXXXXX`
+trap 'rm -rf $T' 0 2
+
image="$1"
if test -z "$image"
then
@@ -41,9 +48,17 @@ then
echo "Invalid duration, should be numeric in seconds: '$seconds'"
exit 1
fi
+bootargs="$5"
+
+# Build awk program.
+echo "BEGIN {" > $T/bootarg.awk
+echo $bootargs | tr -s ' ' '\012' |
+ awk -v dq='"' '/./ { print "\tbootarg[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk
+echo $bootargs | tr -s ' ' '\012' | sed -e 's/=.*$//' |
+ awk -v dq='"' '/./ { print "\tbootpar[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk
+cat >> $T/bootarg.awk << '___EOF___'
+}
-awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
- -v seconds="$seconds" '
/^# seconds=/ {
if (seconds == "")
print $0;
@@ -70,13 +85,7 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
{
line = "";
for (i = 1; i <= NF; i++) {
- if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) {
- sub(/[0-9]*$/, seconds, $i);
- if (line == "")
- line = $i;
- else
- line = line " " $i;
- } else if (line == "") {
+ if (line == "") {
line = $i;
} else {
line = line " " $i;
@@ -87,7 +96,44 @@ awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
} else if ($i == "-kernel") {
i++;
line = line " " image;
+ } else if ($i == "-append") {
+ for (i++; i <= NF; i++) {
+ arg = $i;
+ lq = "";
+ rq = "";
+ if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/)
+ sub(/[0-9]*$/, seconds, arg);
+ if (arg ~ /^"/) {
+ lq = substr(arg, 1, 1);
+ arg = substr(arg, 2);
+ }
+ if (arg ~ /"$/) {
+ rq = substr(arg, length($i), 1);
+ arg = substr(arg, 1, length($i) - 1);
+ }
+ par = arg;
+ gsub(/=.*$/, "", par);
+ j = 1;
+ while (bootpar[j] != "") {
+ if (bootpar[j] == par) {
+ arg = "";
+ break;
+ }
+ j++;
+ }
+ if (line == "")
+ line = lq arg;
+ else
+ line = line " " lq arg;
+ }
+ for (j in bootarg)
+ line = line " " bootarg[j];
+ line = line rq;
}
}
print line;
-}'
+}
+___EOF___
+
+awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
+ -v seconds="$seconds" -f $T/bootarg.awk
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6c734818a875..7710b1e1cdda 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -14,9 +14,8 @@
scriptname=$0
args="$*"
-T=${TMPDIR-/tmp}/kvm.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
cd `dirname $scriptname`/../../../../../
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 2dbfca3589b1..5a0b7ffcf047 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -15,9 +15,8 @@
F=$1
title=$2
-T=${TMPDIR-/tmp}/parse-build.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/parse-build.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
. functions.sh
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index d477618e7261..130d0de4c3bb 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -206,9 +206,8 @@ ds="`date +%Y.%m.%d-%H.%M.%S`-torture"
startdate="`date`"
starttime="`get_starttime`"
-T=/tmp/torture.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/torture.sh.XXXXXX`"
trap 'rm -rf $T' 0 2
-mkdir $T
echo " --- " $scriptname $args | tee -a $T/log
echo " --- Results directory: " $ds | tee -a $T/log
@@ -278,6 +277,8 @@ function torture_one {
then
cat $T/$curflavor.out | tee -a $T/log
echo retcode=$retcode | tee -a $T/log
+ else
+ echo $resdir > $T/last-resdir
fi
if test "$retcode" == 0
then
@@ -303,10 +304,12 @@ function torture_set {
shift
curflavor=$flavor
torture_one "$@"
+ mv $T/last-resdir $T/last-resdir-nodebug || :
if test "$do_kasan" = "yes"
then
curflavor=${flavor}-kasan
torture_one "$@" --kasan
+ mv $T/last-resdir $T/last-resdir-kasan || :
fi
if test "$do_kcsan" = "yes"
then
@@ -317,6 +320,7 @@ function torture_set {
cur_kcsan_kmake_args="$kcsan_kmake_args"
fi
torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
+ mv $T/last-resdir $T/last-resdir-kcsan || :
fi
}
@@ -326,20 +330,34 @@ then
echo " --- allmodconfig:" Start `date` | tee -a $T/log
amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig"
mkdir -p "$amcdir"
- echo " --- make clean" > "$amcdir/Make.out" 2>&1
+ echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1
make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
- echo " --- make allmodconfig" >> "$amcdir/Make.out" 2>&1
- cp .config $amcdir
- make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
- echo " --- make " >> "$amcdir/Make.out" 2>&1
- make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
- retcode="$?"
- echo $retcode > "$amcdir/Make.exitcode"
- if test "$retcode" == 0
+ retcode=$?
+ buildphase='"make clean"'
+ if test "$retcode" -eq 0
+ then
+ echo " --- make allmodconfig" | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1
+ cp .config $amcdir
+ make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
+ retcode=$?
+ buildphase='"make allmodconfig"'
+ fi
+ if test "$retcode" -eq 0
+ then
+ echo " --- make " | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1
+ make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
+ retcode="$?"
+ echo $retcode > "$amcdir/Make.exitcode"
+ buildphase='"make"'
+ fi
+ if test "$retcode" -eq 0
then
echo "allmodconfig($retcode)" $amcdir >> $T/successes
+ echo Success >> $amcdir/log
else
echo "allmodconfig($retcode)" $amcdir >> $T/failures
+ echo " --- allmodconfig Test summary:" >> $amcdir/log
+ echo " --- Summary: Exit code $retcode from $buildphase, see Make.out" >> $amcdir/log
fi
fi
@@ -379,11 +397,48 @@ then
else
primlist=
fi
+firsttime=1
+do_kasan_save="$do_kasan"
+do_kcsan_save="$do_kcsan"
for prim in $primlist
do
- torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+ if test -n "$firsttime"
+ then
+ torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
+ torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+ mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
+ if test -f "$T/last-resdir-kasan"
+ then
+ mv $T/last-resdir-kasan $T/first-resdir-kasan || :
+ fi
+ if test -f "$T/last-resdir-kcsan"
+ then
+ mv $T/last-resdir-kcsan $T/first-resdir-kcsan || :
+ fi
+ firsttime=
+ do_kasan=
+ do_kcsan=
+ else
+ torture_bootargs=
+ for i in $T/first-resdir-*
+ do
+ case "$i" in
+ *-nodebug)
+ torture_suffix=
+ ;;
+ *-kasan)
+ torture_suffix="-kasan"
+ ;;
+ *-kcsan)
+ torture_suffix="-kcsan"
+ ;;
+ esac
+ torture_set "refscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "refscale.scale_type=$prim"
+ done
+ fi
done
+do_kasan="$do_kasan_save"
+do_kcsan="$do_kcsan_save"
if test "$do_rcuscale" = yes
then
@@ -391,11 +446,48 @@ then
else
primlist=
fi
+firsttime=1
+do_kasan_save="$do_kasan"
+do_kcsan_save="$do_kcsan"
for prim in $primlist
do
- torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+ if test -n "$firsttime"
+ then
+ torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
+ torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+ mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
+ if test -f "$T/last-resdir-kasan"
+ then
+ mv $T/last-resdir-kasan $T/first-resdir-kasan || :
+ fi
+ if test -f "$T/last-resdir-kcsan"
+ then
+ mv $T/last-resdir-kcsan $T/first-resdir-kcsan || :
+ fi
+ firsttime=
+ do_kasan=
+ do_kcsan=
+ else
+ torture_bootargs=
+ for i in $T/first-resdir-*
+ do
+ case "$i" in
+ *-nodebug)
+ torture_suffix=
+ ;;
+ *-kasan)
+ torture_suffix="-kasan"
+ ;;
+ *-kcsan)
+ torture_suffix="-kcsan"
+ ;;
+ esac
+ torture_set "rcuscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "rcuscale.scale_type=$prim"
+ done
+ fi
done
+do_kasan="$do_kasan_save"
+do_kcsan="$do_kcsan_save"
if test "$do_kvfree" = "yes"
then
@@ -458,7 +550,10 @@ if test -n "$tdir" && test $compress_concurrency -gt 0
then
# KASAN vmlinux files can approach 1GB in size, so compress them.
echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1
- find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo
+ find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo-all
+ find "$tdir" -type f -name 're-run' -print | sed -e 's,/re-run,,' |
+ grep -e '-k[ac]san$' > $T/xz-todo-copy
+ sort $T/xz-todo-all $T/xz-todo-copy | uniq -u > $T/xz-todo
ncompresses=0
batchno=1
if test -s $T/xz-todo
@@ -490,6 +585,24 @@ then
echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
fi
wait
+ if test -s $T/xz-todo-copy
+ then
+ # The trick here is that we need corresponding
+ # vmlinux files from corresponding scenarios.
+ echo Linking vmlinux.xz files to re-use scenarios `date` | tee -a "$tdir/log-xz" | tee -a $T/log
+ dirstash="`pwd`"
+ for i in `cat $T/xz-todo-copy`
+ do
+ cd $i
+ find . -name vmlinux -print > $T/xz-todo-copy-vmlinux
+ for v in `cat $T/xz-todo-copy-vmlinux`
+ do
+ rm -f "$v"
+ cp -l `cat $i/re-run`/"$i/$v".xz "`dirname "$v"`"
+ done
+ cd "$dirstash"
+ done
+ fi
echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log
else
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 4ae6c8991307..9c2f448bb3a9 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -392,6 +392,8 @@ TEST(mode_filter_without_nnp)
.filter = filter,
};
long ret;
+ cap_t cap = cap_get_proc();
+ cap_flag_value_t is_cap_sys_admin = 0;
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
ASSERT_LE(0, ret) {
@@ -400,8 +402,8 @@ TEST(mode_filter_without_nnp)
errno = 0;
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
/* Succeeds with CAP_SYS_ADMIN, fails without */
- /* TODO(wad) check caps not euid */
- if (geteuid()) {
+ cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin);
+ if (!is_cap_sys_admin) {
EXPECT_EQ(-1, ret);
EXPECT_EQ(EACCES, errno);
} else {
diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore
index fe1eb8271b35..cae8dca0fbff 100644
--- a/tools/testing/selftests/timens/.gitignore
+++ b/tools/testing/selftests/timens/.gitignore
@@ -8,3 +8,4 @@ procfs
timens
timer
timerfd
+vfork_exec
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
index 3a5936cc10ab..f0d51d4d2c87 100644
--- a/tools/testing/selftests/timens/Makefile
+++ b/tools/testing/selftests/timens/Makefile
@@ -1,4 +1,4 @@
-TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex
+TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec
TEST_GEN_PROGS_EXTENDED := gettime_perf
CFLAGS := -Wall -Werror -pthread
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
new file mode 100644
index 000000000000..beb7614941fb
--- /dev/null
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "log.h"
+#include "timens.h"
+
+#define OFFSET (36000)
+
+struct thread_args {
+ char *tst_name;
+ struct timespec *now;
+};
+
+static void *tcheck(void *_args)
+{
+ struct thread_args *args = _args;
+ struct timespec *now = args->now, tst;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now->tv_sec) > 5) {
+ pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n",
+ args->tst_name, tst.tv_sec, now->tv_sec);
+ return (void *)1UL;
+ }
+ }
+ return NULL;
+}
+
+static int check_in_thread(char *tst_name, struct timespec *now)
+{
+ struct thread_args args = {
+ .tst_name = tst_name,
+ .now = now,
+ };
+ pthread_t th;
+ void *retval;
+
+ if (pthread_create(&th, NULL, tcheck, &args))
+ return pr_perror("thread");
+ if (pthread_join(th, &retval))
+ return pr_perror("pthread_join");
+ return !(retval == NULL);
+}
+
+static int check(char *tst_name, struct timespec *now)
+{
+ struct timespec tst;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now->tv_sec) > 5)
+ return pr_fail("%s: unexpected value: %ld (%ld)\n",
+ tst_name, tst.tv_sec, now->tv_sec);
+ }
+ if (check_in_thread(tst_name, now))
+ return 1;
+ ksft_test_result_pass("%s\n", tst_name);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec now;
+ int status;
+ pid_t pid;
+
+ if (argc > 1) {
+ char *endptr;
+
+ ksft_cnt.ksft_pass = 1;
+ now.tv_sec = strtoul(argv[1], &endptr, 0);
+ if (*endptr != 0)
+ return pr_perror("strtoul");
+
+ return check("child after exec", &now);
+ }
+
+ nscheck();
+
+ ksft_set_plan(4);
+
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ if (unshare_timens())
+ return 1;
+
+ if (_settime(CLOCK_MONOTONIC, OFFSET))
+ return 1;
+
+ if (check("parent before vfork", &now))
+ return 1;
+
+ pid = vfork();
+ if (pid < 0)
+ return pr_perror("fork");
+
+ if (pid == 0) {
+ char now_str[64];
+ char *cargv[] = {"exec", now_str, NULL};
+ char *cenv[] = {NULL};
+
+ /* Check for proper vvar offsets after execve. */
+ snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET);
+ execve("/proc/self/exe", cargv, cenv);
+ pr_perror("execve");
+ _exit(1);
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("waitpid");
+
+ if (status)
+ ksft_exit_fail();
+ ksft_inc_pass_cnt();
+ ksft_test_result_pass("wait for child\n");
+
+ /* Check that we are still in the source timens. */
+ if (check("parent after vfork", &now))
+ return 1;
+
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh
index 26e193ffd2a2..873a892147e5 100644
--- a/tools/vm/slabinfo-gnuplot.sh
+++ b/tools/vm/slabinfo-gnuplot.sh
@@ -150,7 +150,7 @@ do_preprocess()
let lines=3
out=`basename "$in"`"-slabs-by-loss"
`cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\
- egrep -iv '\-\-|Name|Slabs'\
+ grep -E -iv '\-\-|Name|Slabs'\
| awk '{print $1" "$4+$2*$3" "$4}' > "$out"`
if [ $? -eq 0 ]; then
do_slabs_plotting "$out"
@@ -159,7 +159,7 @@ do_preprocess()
let lines=3
out=`basename "$in"`"-slabs-by-size"
`cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\
- egrep -iv '\-\-|Name|Slabs'\
+ grep -E -iv '\-\-|Name|Slabs'\
| awk '{print $1" "$4" "$4-$2*$3}' > "$out"`
if [ $? -eq 0 ]; then
do_slabs_plotting "$out"
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 0fffaeedee76..cfaeaea71042 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -157,9 +157,11 @@ static unsigned long read_obj(const char *name)
{
FILE *f = fopen(name, "r");
- if (!f)
+ if (!f) {
buffer[0] = 0;
- else {
+ if (errno == EACCES)
+ fatal("%s, Try using superuser\n", strerror(errno));
+ } else {
if (!fgets(buffer, sizeof(buffer), f))
buffer[0] = 0;
fclose(f);